|
|
1.1 root 1: ;******************************Module*Header*******************************\
2: ; Module Name: nalgnblt.asm
3: ;
4: ; driver prototypes
5: ;
6: ; Copyright (c) 1992 Microsoft Corporation
7: ;**************************************************************************/
8:
9: ;-----------------------------------------------------------------------;
10: ; VOID vNonAlignedSrcCopy(PDEVSURF pdsurf, RECTL * prcldst, PPOINTL * pptlsrc,
11: ; INT icopydir);
12: ; Input:
13: ; pdsurf - surface on which to copy
14: ; prcldest - pointer to destination rectangle
15: ; pptlsrc - pointer to source upper left corner
16: ; icopydir - direction in which copy must proceed to avoid overlap problems
17: ; and synchronize with the clip enumeration visually, according to
18: ; constants CD_RIGHTDOWN, CD_LEFTDOWN, CD_RIGHTUP, and CD_LEFTUP in
19: ; WINDDI.H
20: ;
21: ; Performs accelarated non-aligned SRCCOPY VGA-to-VGA blts.
22: ;
23: ;-----------------------------------------------------------------------;
24: ;
25: ; Note: The source and dest *must* be non-aligned (not have the same
26: ; left-edge intrabyte pixel alignment. Will not work properly if they are
27: ; in fact aligned.
28: ;
29: ; Note: Assumes all rectangles have positive heights and widths. Will not
30: ; work properly if this is not the case.
31: ;
32: ;-----------------------------------------------------------------------;
33:
34: comment $
35:
36: The overall approach of this module for each rectangle to copy is:
37:
38: 1) Precalculate the masks and whole byte widths, and determine which of
39: partial left edge, partial right edge, and whole middle bytes are required
40: for this copy.
41:
42: 2) Set up the starting pointers for each of the areas (left, whole middle,
43: right), the start and stop scan lines, the copying direction (left-to-right
44: or right-to-left, and top-to-bottom or bottom-to-top), the threading
45: (sequence of calls required to do the left/whole/right components in the
46: proper sequence), based on the passed-in copy direction, which in turn is
47: dictated by the nature of the overlap between the source and destination.
48:
49: 3) Execute a loop, based on adapter type (2 R/W windows, 1R/1W window,
50: 1 R/W window, unbanked), that sequences through the intersection of each
51: bank with the source and destination rectangles in the proper direction
52: (top-to-bottom or bottom-to-top, based on the passed-in copy direction),
53: and performs the copy in each such rectangle. The threading vector is used
54: to call the required routines (copy left/whole/right bytes). For 1 R/W and
55: 1R/1W adapters, there is a second threading vector that is called when the
56: source and the destination are both adequately (for the copy purposes)
57: addressable simultaneously (because they're in the same bank), so there's
58: no need to copy through a temp buffer. We want to avoid the temp
59: buffer whenever we can, because it's slower.
60:
61: Note: 1 R/W and 1R/1W edges are copied through a temporary buffer. However,
62: each plane's bytes are not stored in the corresponding plane's temp buffer, but
63: rather consecutively in the plane 0 temp buffer. This is to reduce page
64: faulting, and also so that 1R/1W adapters only need a temp buffer large enough
65: to hold 4*tallest bank words (4K will do). 1 R/W adapters still copy whole
66: bytes through the full temp buffer, using all four planes' temp buffers, so
67: they require a temp buffer big enough to hold a full bank (256K will do).
68:
69: Note: The VGA's rotator is used to perform all rotation in this module. The
70: two source bytes relevant to this operation are masked to preserve the desired
71: bits, then combined and fed to the VGA's rotator, which performs the rotation.
72: This is better than letting the 386/486 do the rotation because even with the
73: barrel shifter, those processors take 3 cycles per rotate, where the masking
74: and combining take only 2 cycles (or no cycles, for edges with 1-wide
75: sources). We also get to avoid 16-bit instructions like ROL AX,CL; the 16-bit
76: size prefix costs a cycle on a 486.
77:
78: commend $
79:
80: ;-----------------------------------------------------------------------;
81: ; Set LOOP_UNROLL_SHIFT to the log2 of the number of times you want loops in
82: ; this module unrolled. For example, LOOP_UNROLL_SHIFT of 3 yields 2**3 = 8
83: ; times unrolling. This is the only thing you need to change to control
84: ; unrolling. Note: does not affect loops that process in chunks, like edge
85: ; loops.
86:
87: LOOP_UNROLL_SHIFT equ 2
88:
89: ;-----------------------------------------------------------------------;
90: ; Maximum # of edge bytes to process before switching to next plane. Larger
91: ; means faster, but there's more potential for flicker, since the raster scan
92: ; has a better chance of catching bytes that have changed in some planes but
93: ; not all planes.
94:
95: EDGE_CHUNK_SIZE equ 16
96:
97: ;-----------------------------------------------------------------------;
98: ; Macro to push the current threading sequence (string of routine calls) on the
99: ; stack, then jump to the first threading entry. The threading pointer can be
100: ; specified, or defaults to pCurrentThread. The return address can be
101: ; immediately after the JMP, or can be specified.
102:
103: THREAD_AND_START macro THREADING,RETURN_ADDR
104: local push_base, return_address
105:
106: ifb <&RETURN_ADDR&>
107: push offset return_address ;after all the threaded routines, we
108: ; return here
109: else
110: push offset &RETURN_ADDR& ;return here
111: endif
112:
113: ifb <&THREADING&>
114: mov eax,pCurrentThread
115: else
116: mov eax,&THREADING&
117: endif
118:
119: mov ecx,[eax] ;# of routines to thread (at least 1)
120: lea ecx,[ecx*2+ecx] ;pushes below are 3 bytes each
121: mov edx,offset push_base+3
122: sub edx,ecx
123: jmp edx ;branch to push or jmp below
124:
125: ; Push the threading addresses on to the stack, so routines perform the
126: ; threading as they return.
127:
128: push dword ptr [eax+12] ;3 byte instruction
129: push dword ptr [eax+8]
130: push_base:
131: jmp dword ptr [eax+4] ;jump to the first threaded routine
132:
133: align 4
134: return_address:
135: endm
136:
137: ;-----------------------------------------------------------------------;
138:
139: .386
140:
141: ifndef DOS_PLATFORM
142: .model small,c
143: else
144: ifdef STD_CALL
145: .model small,c
146: else
147: .model small,pascal
148: endif; STD_CALL
149: endif; DOS_PLATFORM
150:
151: assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
152: assume fs:nothing,gs:nothing
153:
154: .xlist
155: include stdcall.inc ;calling convention cmacros
156: include i386\egavga.inc
157: include i386\strucs.inc
158: include i386\unroll.inc
159: include i386\ropdefs.inc
160:
161: .list
162:
163: ;-----------------------------------------------------------------------;
164:
165: .data
166:
167: ; Threads for stringing together left, whole byte, and right operations
168: ; in various orders, both using a temp buffer and not. Data format is:
169: ;
170: ; DWORD +0 = # of calls in thread (1, 2, or 3)
171: ; +4 = first call (required)
172: ; +8 = second call (optional)
173: ; +12 = third call (optional)
174:
175: align 4
176:
177: ; Copies not involving the temp buffer.
178:
179: Thread_L dd 1
180: dd copy_left_edge
181:
182: Thread_W dd 1
183: dd copy_whole_bytes
184:
185: Thread_R dd 1
186: dd copy_right_edge
187:
188: Thread_LR dd 2
189: dd copy_left_edge
190: dd copy_right_edge
191:
192: Thread_RL dd 2
193: dd copy_right_edge
194: dd copy_left_edge
195:
196: Thread_LW dd 2
197: dd copy_left_edge
198: dd copy_whole_bytes
199:
200: Thread_WL dd 2
201: dd copy_whole_bytes
202: dd copy_left_edge
203:
204: Thread_WR dd 2
205: dd copy_whole_bytes
206: dd copy_right_edge
207:
208: Thread_RW dd 2
209: dd copy_right_edge
210: dd copy_whole_bytes
211:
212: Thread_LWR dd 3
213: dd copy_left_edge
214: dd copy_whole_bytes
215: dd copy_right_edge
216:
217: Thread_RWL dd 3
218: dd copy_right_edge
219: dd copy_whole_bytes
220: dd copy_left_edge
221:
222: ; Copies involving the temp buffer.
223:
224: Thread_Lb dd 1
225: dd copy_left_edge_via_buffer
226:
227: Thread_Wb dd 1
228: dd copy_whole_bytes_via_buffer
229:
230: Thread_Rb dd 1
231: dd copy_right_edge_via_buffer
232:
233: Thread_LbRb dd 2
234: dd copy_left_edge_via_buffer
235: dd copy_right_edge_via_buffer
236:
237: Thread_RbLb dd 2
238: dd copy_right_edge_via_buffer
239: dd copy_left_edge_via_buffer
240:
241: Thread_LbW dd 2
242: dd copy_left_edge_via_buffer
243: dd copy_whole_bytes
244:
245: Thread_LbWb dd 2
246: dd copy_left_edge_via_buffer
247: dd copy_whole_bytes_via_buffer
248:
249: Thread_WLb dd 2
250: dd copy_whole_bytes
251: dd copy_left_edge_via_buffer
252:
253: Thread_WbLb dd 2
254: dd copy_whole_bytes_via_buffer
255: dd copy_left_edge_via_buffer
256:
257: Thread_WRb dd 2
258: dd copy_whole_bytes
259: dd copy_right_edge_via_buffer
260:
261: Thread_WbRb dd 2
262: dd copy_whole_bytes_via_buffer
263: dd copy_right_edge_via_buffer
264:
265: Thread_RbW dd 2
266: dd copy_right_edge_via_buffer
267: dd copy_whole_bytes
268:
269: Thread_RbWb dd 2
270: dd copy_right_edge_via_buffer
271: dd copy_whole_bytes_via_buffer
272:
273: Thread_LbWRb dd 3
274: dd copy_left_edge_via_buffer
275: dd copy_whole_bytes
276: dd copy_right_edge_via_buffer
277:
278: Thread_LbWbRb dd 3
279: dd copy_left_edge_via_buffer
280: dd copy_whole_bytes_via_buffer
281: dd copy_right_edge_via_buffer
282:
283: Thread_RbWLb dd 3
284: dd copy_right_edge_via_buffer
285: dd copy_whole_bytes
286: dd copy_left_edge_via_buffer
287:
288: Thread_RbWbLb dd 3
289: dd copy_right_edge_via_buffer
290: dd copy_whole_bytes_via_buffer
291: dd copy_left_edge_via_buffer
292:
293: ;-----------------------------------------------------------------------;
294: ; Table of thread selection for various horizontal copy directions, with
295: ; the look-up index a 4-bit field as follows:
296: ;
297: ; Bit 3 = 1 if left-to-right copy, 0 if right-to-left
298: ; Bit 2 = 1 if left edge must be copied
299: ; Bit 1 = 1 if whole bytes must be copied
300: ; Bit 0 = 1 if right edge must be copied
301: ;
302: ; This is used for all cases where both the source and destination are
303: ; simultaneously addressable for our purposes, so there's no need to go
304: ; through the temp buffer (unbanked, 2 R/W, and sometimes for 1 R/W and 1R/1W).
305:
306: MasterThreadTable label dword
307: ;right-to-left
308: dd 0 ;<not used>
309: dd Thread_R ;R->L, R
310: dd Thread_W ;R->L, W
311: dd Thread_RW ;R->L, RW
312: dd Thread_L ;R->L, L
313: dd Thread_RL ;R->L, RL
314: dd Thread_WL ;R->L, WL
315: dd Thread_RWL ;R->L, RWL
316: ;left-to-right
317: dd 0 ;<not used>
318: dd Thread_R ;L->R, R
319: dd Thread_W ;L->R, W
320: dd Thread_WR ;L->R, WR
321: dd Thread_L ;L->R, L
322: dd Thread_LR ;L->R, LR
323: dd Thread_LW ;L->R, LW
324: dd Thread_LWR ;L->R, LWR
325:
326:
327: ; Table of thread selection for various adapter types and horizontal
328: ; copy directions, with the look-up index a 6-bit field as follows:
329: ;
330: ; Bit 5 = adapter type high bit
331: ; Bit 4 = adapter type low bit
332: ; Bit 3 = 1 if left-to-right copy, 0 if right-to-left
333: ; Bit 2 = 1 if left edge must be copied
334: ; Bit 1 = 1 if whole bytes must be copied
335: ; Bit 0 = 1 if right edge must be copied
336: ;
337: ; This is used for all cases where the source and destination are not both
338: ; simultaneously addressable for our purposes, so we need to go through the
339: ; temp buffer (only for 1 R/W and 1R/1W, and only sometimes).
340:
341: MasterThreadTableViaBuffer label dword
342: ;unbanked (no need for buffer)
343: ;right-to-left
344: dd 0 ;<not used>
345: dd Thread_R ;R->L, R
346: dd Thread_W ;R->L, W
347: dd Thread_RW ;R->L, RW
348: dd Thread_L ;R->L, L
349: dd Thread_RL ;R->L, RL
350: dd Thread_WL ;R->L, WL
351: dd Thread_RWL ;R->L, RWL
352: ;left-to-right
353: dd 0 ;<not used>
354: dd Thread_R ;L->R, R
355: dd Thread_W ;L->R, W
356: dd Thread_WR ;L->R, WR
357: dd Thread_L ;L->R, L
358: dd Thread_LR ;L->R, LR
359: dd Thread_LW ;L->R, LW
360: dd Thread_LWR ;L->R, LWR
361:
362: ;1 R/W banking window (everything goes through
363: ; buffer)
364: ;right-to-left
365: dd 0 ;<not used>
366: dd Thread_Rb ;R->L, R
367: dd Thread_Wb ;R->L, W
368: dd Thread_RbWb ;R->L, RW
369: dd Thread_Lb ;R->L, L
370: dd Thread_RbLb ;R->L, RL
371: dd Thread_WbLb ;R->L, WL
372: dd Thread_RbWbLb ;R->L, RWL
373: ;left-to-right
374: dd 0 ;<not used>
375: dd Thread_Rb ;L->R, R
376: dd Thread_Wb ;L->R, W
377: dd Thread_WbRb ;L->R, WR
378: dd Thread_Lb ;L->R, L
379: dd Thread_LbRb ;L->R, LR
380: dd Thread_LbWb ;L->R, LW
381: dd Thread_LbWbRb ;L->R, LWR
382:
383: ;1R/1W banking window (edge go through buffer)
384: ;right-to-left
385: dd 0 ;<not used>
386: dd Thread_Rb ;R->L, R
387: dd Thread_W ;R->L, W
388: dd Thread_RbW ;R->L, RW
389: dd Thread_Lb ;R->L, L
390: dd Thread_RbLb ;R->L, RL
391: dd Thread_WLb ;R->L, WL
392: dd Thread_RbWLb ;R->L, RWL
393: ;left-to-right
394: dd 0 ;<not used>
395: dd Thread_Rb ;L->R, R
396: dd Thread_W ;L->R, W
397: dd Thread_WRb ;L->R, WR
398: dd Thread_Lb ;L->R, L
399: dd Thread_LbRb ;L->R, LR
400: dd Thread_LbW ;L->R, LW
401: dd Thread_LbWRb ;L->R, LWR
402:
403: ;2 R/W banking window (no need for buffer)
404: ;right-to-left
405: dd 0 ;<not used>
406: dd Thread_R ;R->L, R
407: dd Thread_W ;R->L, W
408: dd Thread_RW ;R->L, RW
409: dd Thread_L ;R->L, L
410: dd Thread_RL ;R->L, RL
411: dd Thread_WL ;R->L, WL
412: dd Thread_RWL ;R->L, RWL
413: ;left-to-right
414: dd 0 ;<not used>
415: dd Thread_R ;L->R, R
416: dd Thread_W ;L->R, W
417: dd Thread_WR ;L->R, WR
418: dd Thread_L ;L->R, L
419: dd Thread_LR ;L->R, LR
420: dd Thread_LW ;L->R, LW
421: dd Thread_LWR ;L->R, LWR
422:
423:
424: ; Amount to shift adapter type field left for use in MasterThreadTableViaBuffer.
425:
426: ADAPTER_FIELD_SHIFT equ 4
427:
428: ; Mask for setting left-to-right bit to "left-to-right true" for use in both
429: ; MasterThread tables.
430:
431: LEFT_TO_RIGHT_FIELD_SET equ 1000b
432:
433:
434: ; Table of top-to-bottom loops for adapter types.
435:
436: align 4
437: TopToBottomLoopTable label dword
438: dd top_to_bottom_2RW ;unbanked is same as 2RW
439: dd top_to_bottom_1RW
440: dd top_to_bottom_1R1W
441: dd top_to_bottom_2RW
442:
443:
444: ; Table of bottom-to-top loops for adapter types.
445:
446: align 4
447: BottomToTopLoopTable label dword
448: dd bottom_to_top_2RW ;unbanked is same as 2RW
449: dd bottom_to_top_1RW
450: dd bottom_to_top_1R1W
451: dd bottom_to_top_2RW
452:
453:
454: ; Table of routines for setting up to copy in various directions.
455:
456: align 4
457: SetUpForCopyDirection label dword
458: dd left_to_right_top_to_bottom ;CD_RIGHTDOWN
459: dd right_to_left_top_to_bottom ;CD_LEFTDOWN
460: dd left_to_right_bottom_to_top ;CD_RIGHTUP
461: dd right_to_left_bottom_to_top ;CD_LEFTUP
462:
463: ;-----------------------------------------------------------------------;
464: ; Left edge clip masks for intrabyte start addresses 0 through 7.
465: ; Whole byte cases are flagged as 0ffh.
466:
467: jLeftMaskTable label byte
468: db 0ffh,07fh,03fh,01fh,00fh,007h,003h,001h
469:
470: ;-----------------------------------------------------------------------;
471: ; Right edge clip masks for intrabyte end addresses (non-inclusive)
472: ; 0 through 7. Whole byte cases are flagged as 0ffh.
473:
474: jRightMaskTable label byte
475: db 0ffh,080h,0c0h,0e0h,0f0h,0f8h,0fch,0feh
476:
477: ;-----------------------------------------------------------------------;
478: ; Table of width-based source-edge-to-buffer copy routines.
479:
480: align 4
481: copy_edge_from_screen_to_buffer label dword
482: dd copy_screen_to_buffered_edge_1ws
483: dd copy_screen_to_buffered_edge_2ws
484:
485: ;-----------------------------------------------------------------------;
486: ; Table of width-based buffer-to-dest-edge copy routines.
487:
488: align 4
489: copy_edge_from_buffer_to_screen label dword
490: dd copy_buffered_edge_to_screen_1ws
491: dd copy_buffered_edge_to_screen_2ws
492:
493: ;-----------------------------------------------------------------------;
494: ; Table of width-based edge copy routines (no intermediate buffer).
495:
496: align 4
497: copy_edge_table label dword
498: dd copy_edge_1ws
499: dd copy_edge_2ws
500:
501: ;-----------------------------------------------------------------------;
502:
503: .code
504:
505: _TEXT$04 SEGMENT DWORD USE32 PUBLIC 'CODE'
506: ASSUME CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
507:
508: ;-----------------------------------------------------------------------;
509:
510: cProc vNonAlignedSrcCopy,16,< \
511: uses esi edi ebx, \
512: pdsurf: ptr DEVSURF, \
513: prcldest : ptr RECTL, \
514: pptlsrc : ptr POINTL, \
515: icopydir : dword
516:
517: local culWholeBytesWidth : dword ;# of bytes to copy across each scan
518: local ulBlockHeight : dword ;# of scans to copy per bank block
519: local ulWholeScanDelta : dword;offset from end of one whole bytes
520: ; scan to start of next
521: local ulWholeBytesSrc : dword ;offset in bitmap of first source whole
522: ; byte to copy from
523: local ulWholeBytesDest : dword;offset in bitmap of first source whole
524: ; byte to copy to
525: local ulLeftEdgeSrc : dword ;offset in bitmap of first source left
526: ; edge byte to copy from
527: local ulLeftEdgeDest : dword ;offset in bitmap of first dest left
528: ; edge byte to copy to
529: local ulRightEdgeSrc : dword ;offset in bitmap of first source right
530: ; edge byte to copy from
531: local ulRightEdgeDest : dword ;offset in bitmap of first dest right
532: ; edge byte to copy to
533: local ulNextScan : dword ;width of scan, in bytes
534: local jLeftMask : dword ;left edge clip mask
535: local jRightMask : dword ;right edge clip mask
536: local culTempCount : dword ;handy temporary counter
537: local pTempEntry : dword ;temporary storage for vector into
538: ; unrolled loop
539: local pTempPlane : dword ;pointer to storage in temp buffer for
540: ; edge bytes (which are stored
541: ; consecutively, not in each plane's
542: ; temp buffer, to reduce possible page
543: ; faulting
544: local ppTempPlane0 : dword ;pointer to pointer to storage in temp
545: ; buffer for plane 0, immediately
546: ; preceded by storage for planes 1, 2,
547: ; and 3
548: local ppTempPlane3 : dword ;like above, but for plane 3
549: local ulOffsetInBank : dword ;offset relative to bank start
550: local pSrcAddr : dword ;working pointer to first source
551: ; byte to copy from
552: local pDestAddr : dword ;working pointer to first dest
553: ; byte to copy to
554: local ulCurrentJustification:dword ;justification used to map in
555: ; banks; top for top to bottom
556: ; copies, bottom for bottom to top
557: local ulCurrentSrcScan :dword ;scan line used to map in current
558: ; source bank
559: local ulCurrentDestScan:dword ;scan line used to map in current dest
560: ; bank
561: local ulLastDestScan :dword ;scan in target rect at which we stop
562: ; advancing through banks
563: local pCurrentThread : dword ;pointer to data describing the
564: ; threaded calls to be performed to
565: ; perform the current copy
566: local pCurrentThreadViaBuffer:dword
567: ;pointer to data describing the
568: ; threaded calls to be performed to
569: ; perform the current copy in the case
570: ; where the source and destination are
571: ; not simultaneously adequately
572: ; accessible, so the copy has to go
573: ; through a temp buffer (used only for
574: ; 1 R/W and 1R/1W banking)
575: local ulAdapterType : dword ;adapter type code, per VIDEO_BANK_TYPE
576: local ulLWRType : dword ;whether left edge, whole bytes, and
577: ; right edge are involved in the
578: ; current operation;
579: ; bit 2 = 1 if left edge involved
580: ; bit 1 = 1 if whole bytes involved
581: ; bit 0 = 1 if right edge involved
582: local ulLeftEdgeAdjust :dword ;used to bump the whole bytes start
583: ; address past the left edge when the
584: ; left edge is partial
585: local ulCombineMask : dword ;mask for combining desired portions
586: ; of AL and AH before ORing to make a
587: ; single byte; used to combine before
588: ; letting VGA rotate byte as it's
589: ; written. Used for all cases except
590: ; whole bytes copied left-to-right
591: local ulCombineMaskWhole : dword
592: ;mask for combining desired portions of
593: ; AL and AH when copying whole bytes
594: ; (different from ulCombineMask in the
595: ; case of whole bytes left-to-right
596: ; copies, because then AH is the lsb
597: ; and AL is the MSB; then, this is
598: ; ulCombineMask with the bytes swapped.
599: ; For right-to-left whole byte copies,
600: ; this is the same as ulCombineMask)
601: local ulTempScanCount : dword ;temp scan line countdown variable
602: local ulWholeScanSrcDelta : dword
603: ;offset from end of one source whole
604: ; bytes scan line to start of next.
605: ; Differs from ulWholeScanDelta because
606: ; of source rotation pipeline priming
607: local ulLeftSrcWidthMinus1 : dword ;# of bytes in left src edge minus
608: ; one (0 or 1)
609: local ulRightSrcWidthMinus1 : dword ;# of bytes in right src edge
610: ; minus one (0 or 1)
611:
612: ;-----------------------------------------------------------------------;
613:
614: ; Set pointers to temp buffer plane pointers (used only by 1 R/W and 1R/1W
615: ; adapters), and other rectangle-independent variables.
616:
617: mov esi,pdsurf
618: mov eax,[esi].dsurf_pvBankBufferPlane0
619: mov pTempPlane,eax
620: lea eax,[esi].dsurf_pvBankBufferPlane0
621: mov ppTempPlane0,eax
622: lea eax,[esi].dsurf_pvBankBufferPlane3
623: mov ppTempPlane3,eax
624:
625: mov eax,[esi].dsurf_vbtBankingType
626: mov ulAdapterType,eax
627:
628: ; Copy the rectangle.
629:
630: call copy_rect
631:
632: ;-----------------------------------------------------------------------;
633: ; Set the VGA registers back to their default state.
634: ;-----------------------------------------------------------------------;
635:
636: mov edx,VGA_BASE + GRAF_ADDR
637: mov eax,(0ffh shl 8) + GRAF_BIT_MASK
638: out dx,ax ;enable bit mask for all bits
639:
640: mov eax,(DR_SET shl 8) + GRAF_DATA_ROT
641: out dx,ax ;restore default of no rotation
642:
643: mov dl,SEQ_DATA
644: mov al,MM_ALL
645: out dx,al ;enable writes to all planes
646:
647: cld ;restore default direction flag
648:
649: cRet vNonAlignedSrcCopy ;done
650:
651:
652: ;***********************************************************************;
653: ;
654: ; Copies the specified rectangle.
655: ;
656: ;***********************************************************************;
657:
658: align 4
659: copy_rect:
660:
661: ; Calculate the rotation, set up the VGA's rotator, and set the byte-combining
662: ; masks.
663:
664: mov edi,prcldest ;left edge of destination
665: mov esi,pptlsrc
666: mov ah,byte ptr [edi].xLeft ;left edge of source
667: sub ah,byte ptr [esi].ptl_x
668: and ah,07h ;rotation = (dest - source) % 8
669: mov edx,VGA_BASE + GRAF_ADDR
670: mov al,GRAF_DATA_ROT
671: out dx,ax ;set the VGA's rotator for the rotation
672:
673: ; Set up byte-combining mask, in preparation for ORing and letting the VGA's
674: ; rotator rotate, assuming the left-hand source byte is in AL and the
675: ; right-hand source byte is in AH (true for all cases except left-to-right
676: ; whole bytes).
677:
678: mov cl,ah
679: mov eax,0000ff00h
680: rol ax,cl
681: mov ulCombineMask,eax
682:
683: ; Calculate source edge widths (1 or 2 bytes).
684:
685: sub edx,edx ;assume right source width is 1
686: mov ebx,[edi].xLeft
687: mov ecx,[edi].xRight ;dest right edge (non-inclusive)
688: dec ecx ;make it inclusive
689: sub ecx,ebx ;dest width = dest right - dest left
690: mov eax,[esi].ptl_x
691: add ecx,eax ;ECX = right edge of source
692: xor eax,ecx
693: and eax,not 07h ;do the src start and end differ in byte
694: ; address bits? (as opposed to intrabyte)
695: jz short @F ;no, force 1-wide source
696:
697: mov al,byte ptr [edi].xLeft
698: mov ah,byte ptr [esi].ptl_x
699: and eax,00000707h
700: cmp ah,al
701: jb short @F
702: inc edx ;left source width is 2
703: @@:
704: mov ulLeftSrcWidthMinus1,edx
705:
706: sub edx,edx ;assume right source width is 1
707: mov eax,[edi].xRight ;dest right edge (non-inclusive)
708: dec eax ;make it inclusive
709: and cl,07h ;intrabyte source address
710: and al,07h ;intrabyte dest address
711: cmp cl,al
712: ja short @F
713: inc edx ;right source width is 2
714: @@:
715: mov ulRightSrcWidthMinus1,edx
716:
717: ; Set up masks and whole bytes count, and build left/whole/right index
718: ; indicating which of those parts are involved in the copy.
719:
720: mov ebx,[edi].xRight ;right edge of fill (non-inclusive)
721: mov ecx,ebx
722: and ecx,0111b ;intrabyte address of right edge
723: mov ah,jRightMaskTable[ecx] ;right edge mask
724:
725: mov esi,[edi].xLeft ;left edge of fill (inclusive)
726: mov ecx,esi
727: shr ecx,3 ;/8 for start offset from left edge
728: ; of scan line
729: sub ebx,esi ;width in pixels of fill
730:
731: and esi,0111b ;intrabyte address of left edge
732: mov al,jLeftMaskTable[esi] ;left edge mask
733:
734: dec ebx ;make inclusive on right
735: add ebx,esi ;inclusive width, starting counting at
736: ; the beginning of the left edge byte
737: shr ebx,3 ;width of fill in bytes touched - 1
738: jnz short more_than_1_byte ;more than 1 byte is involved
739:
740: ; Only one byte will be affected. Combine first/last masks.
741:
742: and al,ah ;we'll use first byte mask only
743: xor ah,ah ;want last byte mask to be 0 to
744: ; indicate right edge not involved
745: inc ebx ;so there's one count to subtract below
746: ; if this isn't a whole edge byte
747: more_than_1_byte:
748:
749: ; If all pixels in the left edge are altered, combine the first byte into the
750: ; whole byte count, because we can handle solid edge bytes faster as part of
751: ; the whole bytes. Ditto for the right edge.
752:
753: sub ecx,ecx ;edge whole-status accumulator
754: cmp al,-1 ;is left edge a whole byte or partial?
755: adc ecx,ecx ;ECX=1 if left edge partial, 0 if whole
756: sub ebx,ecx ;if left edge partial, deduct it from
757: ; the whole bytes count
758: mov ulLeftEdgeAdjust,ecx ;for skipping over the left edge if
759: ; it's partial when pointing to the
760: ; whole bytes
761: and ah,ah ;is right edge mask 0, meaning this
762: ; fill is only 1 byte wide?
763: jz short save_masks ;yes, no need to do anything
764: or ecx,40h ;assume there's a partial right edge
765: cmp ah,-1 ;is right edge a whole byte or partial?
766: jnz short save_masks ;partial
767: ;bit 1=0 if left edge partial, 1 whole
768: inc ebx ;if right edge whole, include it in the
769: ; whole bytes count
770: and ecx,not 40h ;there's no partial right edge
771: save_masks:
772: cmp ebx,1 ;do we have any whole bytes?
773: cmc ;CF set if whole byte count > 0
774: adc ecx,ecx ;if any whole bytes, set whole bytes
775: ; bit in left/whole/right accumulator
776: rol cl,1 ;align the left/whole/right bits
777: mov ulLWRType,ecx ;save left/whole/right status
778:
779: mov byte ptr jLeftMask,al ;save left and right clip masks
780: mov byte ptr jRightMask,ah
781: mov culWholeBytesWidth,ebx ;save # of whole bytes
782:
783: ; Copy the rectangle in the specified direction.
784:
785: mov eax,icopydir
786: jmp SetUpForCopyDirection[eax*4]
787:
788:
789: ;***********************************************************************;
790: ;
791: ; The following routines set up to handle the four possible copy
792: ; directions.
793: ;
794: ;***********************************************************************;
795:
796:
797: ;-----------------------------------------------------------------------;
798: ; Set-up code for left-to-right, top-to-bottom copies.
799: ;-----------------------------------------------------------------------;
800:
801: align 4
802: left_to_right_top_to_bottom:
803:
804: cld ;we'll copy left to right
805:
806: ; Byte-combining mask, in preparation for ORing and letting the VGA's rotator
807: ; rotate, assuming the left-hand source byte is in AH and the right-hand source
808: ; byte is in AL (true only for left-to-right whole bytes).
809:
810: mov eax,ulCombineMask
811: not eax
812: mov ulCombineMaskWhole,eax
813:
814: mov esi,pdsurf
815: mov eax,[esi].dsurf_lNextScan
816: mov ulNextScan,eax ;copy top to bottom
817: sub eax,culWholeBytesWidth ;offset from end of one dest whole byte
818: mov ulWholeScanDelta,eax ; scan to start of next
819: dec eax ;offset from end of one src whole byte
820: mov ulWholeScanSrcDelta,eax ; scan to start of next, accounting for
821: ; leading byte used to prime the
822: ; rotation pipeline
823:
824: mov esi,ulLWRType ;3-bit flag field for left, whole, and
825: ; right involvement in operation
826: or esi,LEFT_TO_RIGHT_FIELD_SET ;add left-to-right into the index
827: mov eax,MasterThreadTable[esi*4]
828: mov pCurrentThread,eax ;threading when no buffering is needed
829: mov edx,ulAdapterType
830: shl edx,ADAPTER_FIELD_SHIFT
831: or esi,edx ;factor adapter type into the index
832: mov eax,MasterThreadTableViaBuffer[esi*4]
833: mov pCurrentThreadViaBuffer,eax ;threading when buffering is needed
834:
835: mov ulCurrentJustification,JustifyTop ;copy top to bottom
836:
837: mov esi,prcldest
838: mov eax,[esi].yBottom
839: mov ulLastDestScan,eax ;end at bottom of dest copy rect
840: mov eax,[esi].yTop
841: mov ulCurrentDestScan,eax ;start at top of dest copy rect
842: mul ulNextScan ;offset in bitmap of top dest rect scan
843: mov edx,[esi].xLeft
844: shr edx,3 ;byte X address
845: add eax,edx ;offset in bitmap of first dest byte
846: mov ulLeftEdgeDest,eax ;that's where the left dest edge is
847: add eax,ulLeftEdgeAdjust ;the whole bytes start at the next
848: ; byte, unless the left edge is a whole
849: ; byte and is thus part of the whole
850: ; bytes already
851: mov ulWholeBytesDest,eax ;where the whole dest bytes start
852: add eax,culWholeBytesWidth ;point to the right edge
853: mov ulRightEdgeDest,eax ;where the right dest edge starts
854:
855: mov esi,pptlsrc
856: mov eax,[esi].ptl_y
857: mov ulCurrentSrcScan,eax ;start at top of source copy rect
858: mul ulNextScan ;offset in bitmap of top dest rect scan
859: mov edx,[esi].ptl_x
860: shr edx,3 ;byte X address
861: add eax,edx ;offset in bitmap of first source byte
862: mov ulLeftEdgeSrc,eax ;that's where the left src edge is
863: add eax,ulLeftSrcWidthMinus1 ;the first whole byte includes the
864: dec eax ; last (leftmost) left edge byte, so
865: add eax,ulLeftEdgeAdjust ; add a byte if the left edge is 2
866: ; wide, except when the left dest byte
867: ; is solid so the left edge is part of
868: ; the whole bytes
869: mov ulWholeBytesSrc,eax ;where the src whole bytes start
870: add eax,culWholeBytesWidth ;point to the right edge
871: mov ulRightEdgeSrc,eax ;where the right src edge starts,
872: ; because the whole bytes and the right
873: ; source edge share a byte, and we
874: ; always point to the leftmost byte in
875: ; the right source edge
876:
877: ; Branch to the appropriate top-to-bottom bank enumeration loop.
878:
879: mov eax,ulAdapterType
880: jmp TopToBottomLoopTable[eax*4]
881:
882:
883: ;-----------------------------------------------------------------------;
884: ; Set-up code for right-to-left, top-to-bottom copies.
885: ;-----------------------------------------------------------------------;
886:
887: align 4
888: right_to_left_top_to_bottom:
889:
890: std ;we'll copy right to left
891:
892: ; Byte-combining mask, in preparation for ORing and letting the VGA's rotator
893: ; rotate, assuming the left-hand source byte is in AL and the right-hand source
894: ; byte is in AH (always true except for left-to-right whole bytes).
895:
896: mov eax,ulCombineMask
897: mov ulCombineMaskWhole,eax
898:
899: mov esi,pdsurf
900: mov eax,[esi].dsurf_lNextScan
901: mov ulNextScan,eax ;copy top to bottom
902: add eax,culWholeBytesWidth ;offset from end of one whole byte scan
903: mov ulWholeScanDelta,eax ; to start of next, given that we're
904: ; copying one way and going scan-to-
905: ; scan the other way
906: inc eax ;offset from end of one src whole byte
907: mov ulWholeScanSrcDelta,eax ; scan to start of next, accounting for
908: ; leading byte used to prime the
909: ; rotation pipeline
910:
911: mov esi,ulLWRType ;3-bit flag field for left, whole, and
912: ; right involvement in operation
913: ;leave left-to-right field cleared, so
914: ; we look up right-to-left entries
915: mov eax,MasterThreadTable[esi*4]
916: mov pCurrentThread,eax ;threading when no buffering is needed
917: mov edx,ulAdapterType
918: shl edx,ADAPTER_FIELD_SHIFT
919: or esi,edx ;factor adapter type into the index
920: mov eax,MasterThreadTableViaBuffer[esi*4]
921: mov pCurrentThreadViaBuffer,eax ;threading when buffering is needed
922:
923: mov ulCurrentJustification,JustifyTop ;copy top to bottom
924:
925: mov esi,prcldest
926: mov eax,[esi].yBottom
927: mov ulLastDestScan,eax ;end at bottom of dest copy rect
928: mov eax,[esi].yTop
929: mov ulCurrentDestScan,eax ;start at top of dest copy rect
930: mul ulNextScan ;offset in bitmap of top dest rect scan
931: mov edx,[esi].xLeft
932: shr edx,3 ;byte X address
933: add eax,edx ;offset in bitmap of first dest byte
934: mov ulLeftEdgeDest,eax ;that's where the left dest edge is
935: add eax,ulLeftEdgeAdjust ;the whole bytes start at the next
936: ; byte, unless the left edge is a whole
937: ; byte and is thus part of the whole
938: ; bytes already
939: add eax,culWholeBytesWidth ;point to the right edge
940: mov ulRightEdgeDest,eax ;where the right dest edge starts
941: dec eax ;back up to the last whole byte
942: mov ulWholeBytesDest,eax ;where the whole dest bytes start
943:
944: mov esi,pptlsrc
945: mov eax,[esi].ptl_y
946: mov ulCurrentSrcScan,eax ;start at top of source copy rect
947: mul ulNextScan ;offset in bitmap of top dest rect scan
948: mov edx,[esi].ptl_x
949: shr edx,3 ;byte X address
950: add eax,edx ;offset in bitmap of first source byte
951: mov ulLeftEdgeSrc,eax ;that's where the left src edge is
952: add eax,ulLeftSrcWidthMinus1 ;the first whole byte includes the
953: dec eax ; last (leftmost) left edge byte, so
954: add eax,ulLeftEdgeAdjust ; add a byte if the left edge is 2
955: ; wide, except when the left dest byte
956: ; is solid so the left edge is part of
957: ; the whole bytes
958: add eax,culWholeBytesWidth ;point to the right edge of the whole
959: ; src bytes, accounting for the extra
960: ; source byte needed to prime the
961: ; rotation pipeline
962: mov ulWholeBytesSrc,eax ;where the src whole bytes start
963: mov ulRightEdgeSrc,eax ;that's also where the right src edge
964: ; starts, because the whole bytes and
965: ; the right source edge share a byte,
966: ; and we always point to the leftmost
967: ; byte in the right source edge
968:
969: ; Branch to the appropriate top-to-bottom bank enumeration loop.
970:
971: mov eax,ulAdapterType
972: jmp TopToBottomLoopTable[eax*4]
973:
974:
975: ;-----------------------------------------------------------------------;
976: ; Set-up code for left-to-right, bottom-to-top copies.
977: ;-----------------------------------------------------------------------;
978:
979: align 4
980: left_to_right_bottom_to_top:
981:
982: cld ;we'll copy left to right
983:
984: ; Byte-combining mask, in preparation for ORing and letting the VGA's rotator
985: ; rotate, assuming the left-hand source byte is in AH and the right-hand source
986: ; byte is in AL (true only for left-to-right whole bytes).
987:
988: mov eax,ulCombineMask
989: not eax
990: mov ulCombineMaskWhole,eax
991:
992: mov edi,pdsurf
993: mov eax,[edi].dsurf_lNextScan
994: neg eax
995: mov ulNextScan,eax ;copy bottom to top
996: sub eax,culWholeBytesWidth ;offset from end of one whole byte scan
997: mov ulWholeScanDelta,eax ; to start of next, given that we're
998: ; copying one way and going scan-to-
999: ; scan the other way
1000: dec eax ;offset from end of one src whole byte
1001: mov ulWholeScanSrcDelta,eax ; scan to start of next, accounting for
1002: ; leading byte used to prime the
1003: ; rotation pipeline
1004:
1005: mov esi,ulLWRType ;3-bit flag field for left, whole, and
1006: ; right involvement in operation
1007: or esi,LEFT_TO_RIGHT_FIELD_SET ;add left-to-right into the index
1008: mov eax,MasterThreadTable[esi*4]
1009: mov pCurrentThread,eax ;threading when no buffering is needed
1010: mov edx,ulAdapterType
1011: shl edx,ADAPTER_FIELD_SHIFT
1012: or esi,edx ;factor adapter type into the index
1013: mov eax,MasterThreadTableViaBuffer[esi*4]
1014: mov pCurrentThreadViaBuffer,eax ;threading when buffering is needed
1015:
1016: mov ulCurrentJustification,JustifyBottom ;copy bottom to top
1017:
1018: mov esi,prcldest
1019: mov edx,[esi].yTop
1020: mov ulLastDestScan,edx ;end at top of dest copy rect
1021: mov eax,[esi].yBottom
1022: dec eax ;rectangle definition is non-inclusive,
1023: ; so advance to first scan we'll copy
1024: sub edx,eax ;-(offset from rect top to bottom)
1025: push edx ;remember for use with source
1026: mov ulCurrentDestScan,eax ;start at bottom of dest copy rect
1027: mul [edi].dsurf_lNextScan ;offset in bitmap of bottom dest rect
1028: ; scan (first scan to which to copy)
1029: mov edx,[esi].xLeft
1030: shr edx,3 ;byte X address
1031: add eax,edx ;offset in bitmap of first dest byte
1032: mov ulLeftEdgeDest,eax ;that's where the left dest edge is
1033: add eax,ulLeftEdgeAdjust ;the whole bytes start at the next
1034: ; byte, unless the left edge is a whole
1035: ; byte and is thus part of the whole
1036: ; bytes already
1037: mov ulWholeBytesDest,eax ;where the whole dest bytes start
1038: add eax,culWholeBytesWidth ;point to the right edge
1039: mov ulRightEdgeDest,eax ;where the right dest edge starts
1040:
1041: mov esi,pptlsrc
1042: mov eax,[esi].ptl_y
1043: pop edx ;retrieve -(offset from top to bottom)
1044: sub eax,edx ;advance to bottom of source rect
1045: ; (inclusive; this is first scan from
1046: ; which to copy)
1047: mov ulCurrentSrcScan,eax ;start at bottom of source copy rect
1048: mul [edi].dsurf_lNextScan ;offset in bitmap of bottom dest rect
1049: ; scan
1050: mov edx,[esi].ptl_x
1051: shr edx,3 ;byte X address
1052: add eax,edx ;offset in bitmap of first source byte
1053: mov ulLeftEdgeSrc,eax ;that's where the left src edge is
1054: add eax,ulLeftSrcWidthMinus1 ;the first whole byte includes the
1055: dec eax ; last (leftmost) left edge byte, so
1056: add eax,ulLeftEdgeAdjust ; add a byte if the left edge is 2
1057: ; wide, except when the left dest byte
1058: ; is solid so the left edge is part of
1059: ; the whole bytes
1060: mov ulWholeBytesSrc,eax ;where the src whole bytes start
1061: add eax,culWholeBytesWidth ;point to the right edge
1062: mov ulRightEdgeSrc,eax ;where the right src edge starts,
1063: ; because the whole bytes and the right
1064: ; source edge share a byte, and we
1065: ; always point to the leftmost byte in
1066: ; the right source edge
1067:
1068: ; Branch to the appropriate bottom-to-top bank enumeration loop.
1069:
1070: mov eax,ulAdapterType
1071: jmp BottomToTopLoopTable[eax*4]
1072:
1073:
1074: ;-----------------------------------------------------------------------;
1075: ; Set-up code for right-to-left, bottom-to-top copies.
1076: ;-----------------------------------------------------------------------;
1077:
1078: align 4
1079: right_to_left_bottom_to_top:
1080:
1081: std ;we'll copy right to left
1082:
1083: ; Byte-combining mask, in preparation for ORing and letting the VGA's rotator
1084: ; rotate, assuming the left-hand source byte is in AL and the right-hand source
1085: ; byte is in AH (always true except for left-to-right whole bytes).
1086:
1087: mov eax,ulCombineMask
1088: mov ulCombineMaskWhole,eax
1089:
1090: mov edi,pdsurf
1091: mov eax,[edi].dsurf_lNextScan
1092: neg eax
1093: mov ulNextScan,eax ;copy bottom to top
1094: add eax,culWholeBytesWidth ;offset from end of one whole byte scan
1095: mov ulWholeScanDelta,eax ; to start of next
1096: inc eax ;offset from end of one src whole byte
1097: mov ulWholeScanSrcDelta,eax ; scan to start of next, accounting for
1098: ; leading byte used to prime the
1099: ; rotation pipeline
1100:
1101: mov esi,ulLWRType ;3-bit flag field for left, whole, and
1102: ; right involvement in operation
1103: ;leave left-to-right field cleared, so
1104: ; we look up right-to-left entries
1105: mov eax,MasterThreadTable[esi*4]
1106: mov pCurrentThread,eax ;threading when no buffering is needed
1107: mov edx,ulAdapterType
1108: shl edx,ADAPTER_FIELD_SHIFT
1109: or esi,edx ;factor adapter type into the index
1110: mov eax,MasterThreadTableViaBuffer[esi*4]
1111: mov pCurrentThreadViaBuffer,eax ;threading when buffering is needed
1112:
1113: mov ulCurrentJustification,JustifyBottom ;copy bottom to top
1114:
1115: mov esi,prcldest
1116: mov edx,[esi].yTop
1117: mov ulLastDestScan,edx ;end at top of dest copy rect
1118: mov eax,[esi].yBottom
1119: dec eax ;rectangle definition is non-inclusive,
1120: ; so advance to first scan we'll copy
1121: sub edx,eax ;-(offset from rect top to bottom)
1122: push edx ;remember for use with source
1123: mov ulCurrentDestScan,eax ;start at bottom of dest copy rect
1124: mul [edi].dsurf_lNextScan ;offset in bitmap of bottom dest rect
1125: ; scan (first scan to which to copy)
1126: mov edx,[esi].xLeft
1127: shr edx,3 ;byte X address
1128: add eax,edx
1129: mov ulLeftEdgeDest,eax ;that's where the left dest edge is
1130: add eax,ulLeftEdgeAdjust ;the whole bytes start at the next
1131: ; byte, unless the left edge is a whole
1132: ; byte and is thus part of the whole
1133: ; bytes already
1134: add eax,culWholeBytesWidth ;point to the right edge
1135: mov ulRightEdgeDest,eax ;where the right dest edge starts
1136: dec eax ;back up to the last whole byte
1137: mov ulWholeBytesDest,eax ;where the whole dest bytes start
1138:
1139: mov esi,pptlsrc
1140: mov eax,[esi].ptl_y
1141: pop edx ;retrieve -(offset from top to bottom)
1142: sub eax,edx ;advance to bottom of source rect
1143: ; (inclusive; this is first scan from
1144: ; which to copy)
1145: mov ulCurrentSrcScan,eax ;start at bottom of source copy rect
1146: mul [edi].dsurf_lNextScan ;offset in bitmap of bottom dest rect
1147: ; scan
1148: mov edx,[esi].ptl_x
1149: shr edx,3 ;byte X address
1150: add eax,edx ;offset in bitmap of first source byte
1151: mov ulLeftEdgeSrc,eax ;that's where the left src edge is
1152: add eax,ulLeftSrcWidthMinus1 ;the first whole byte includes the
1153: dec eax ; last (leftmost) left edge byte, so
1154: add eax,ulLeftEdgeAdjust ; add a byte if the left edge is 2
1155: ; wide, except when the left dest byte
1156: ; is solid so the left edge is part of
1157: ; the whole bytes
1158: add eax,culWholeBytesWidth ;point to the right edge of the whole
1159: ; src bytes, accounting for the extra
1160: ; source byte needed to prime the
1161: ; rotation pipeline
1162: mov ulWholeBytesSrc,eax ;where the src whole bytes start
1163: mov ulRightEdgeSrc,eax ;that's also where the right src edge
1164: ; starts, because the whole bytes and
1165: ; the right source edge share a byte,
1166: ; and we always point to the leftmost
1167: ; byte in the right source edge
1168:
1169: ; Branch to the appropriate bottom-to-top bank enumeration loop.
1170:
1171: mov eax,ulAdapterType
1172: jmp BottomToTopLoopTable[eax*4]
1173:
1174:
1175: ;***********************************************************************;
1176: ;
1177: ; The following routines are the banking loops.
1178: ;
1179: ;***********************************************************************;
1180:
1181:
1182: ;-----------------------------------------------------------------------;
1183: ; Banking for 2 R/W and unbanked adapters, top to bottom.
1184: ;-----------------------------------------------------------------------;
1185: align 4
1186: top_to_bottom_2RW:
1187:
1188: ; We're going top to bottom. Map in the source and dest, top-justified.
1189:
1190: mov ebx,pdsurf
1191: mov edx,ulCurrentSrcScan
1192: cmp edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source top less than
1193: ; current source bank?
1194: jl short top_2RW_map_init_src_bank ;yes, map in proper bank
1195: cmp edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source top greater than
1196: ; current source bank?
1197: jl short top_2RW_init_src_bank_mapped
1198: ;no, proper bank already mapped
1199: top_2RW_map_init_src_bank:
1200:
1201: ; Map bank containing the top source scan line into source window.
1202: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1203:
1204: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1205: <ebx,edx,JustifyTop,MapSourceBank>
1206:
1207: top_2RW_init_src_bank_mapped:
1208:
1209: mov edx,ulCurrentDestScan
1210: cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest top less than
1211: ; current dest bank?
1212: jl short top_2RW_map_init_dest_bank ;yes, map in proper bank
1213: cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest top greater than
1214: ; current dest bank?
1215: jl short top_2RW_init_dest_bank_mapped
1216: ;no, proper bank already mapped
1217: top_2RW_map_init_dest_bank:
1218:
1219: ; Map bank containing the top dest scan line into source window.
1220: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1221:
1222: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1223: <ebx,edx,JustifyTop,MapDestBank>
1224:
1225: top_2RW_init_dest_bank_mapped:
1226:
1227: ; Bank-by-bank top-to-bottom copy loop.
1228:
1229: top_2RW_bank_loop:
1230:
1231: ; Decide how far we can go before we run out of bank or rectangle to copy.
1232:
1233: mov edx,ulLastDestScan
1234: cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom
1235: jl short @F ;copy rectangle bottom is in this bank
1236: mov edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest extends to end
1237: ; of bank, at least
1238: @@:
1239: sub edx,ulCurrentDestScan ;# of scans we can and want to do in
1240: ; the dest bank
1241: mov eax,[ebx].dsurf_rcl2WindowClipS.yBottom
1242: sub eax,ulCurrentSrcScan ;# of scans we can do in the src bank
1243:
1244: cmp edx,eax
1245: jb short @F ;source bank isn't limiting
1246: mov edx,eax ;source bank is limiting
1247: @@:
1248: mov ulBlockHeight,edx ;# of scans we'll do in this bank
1249:
1250: ; We're ready to copy this block.
1251:
1252: THREAD_AND_START
1253:
1254: ; Any more scans to copy?
1255:
1256: mov eax,ulCurrentDestScan
1257: mov esi,ulBlockHeight
1258: add eax,esi ;we've copied to dest up to here
1259: cmp ulLastDestScan,eax ;are we at the dest rect bottom?
1260: jz short top_2RW_done ;yes, we're done
1261: mov ulCurrentDestScan,eax
1262:
1263: ; Now advance either or both banks, as needed.
1264:
1265: mov ebx,pdsurf
1266: cmp eax,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest scan greater than
1267: ; current dest bank?
1268: jl short top_2RW_dest_bank_mapped ;no, proper bank still mapped
1269:
1270: ; Map bank containing the current dest scan line into source window.
1271: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1272:
1273: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1274: <ebx,eax,JustifyTop,MapDestBank>
1275:
1276: top_2RW_dest_bank_mapped:
1277:
1278: add esi,ulCurrentSrcScan ;we've copied from source up to here
1279: mov ulCurrentSrcScan,esi
1280:
1281: cmp esi,[ebx].dsurf_rcl2WindowClipS.yBottom ;src scan greater than
1282: ; current src bank?
1283: jl short top_2RW_src_bank_mapped ;no, proper bank still mapped
1284:
1285: ; Map bank containing the current source scan line into source window.
1286: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1287:
1288: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1289: <ebx,esi,JustifyTop,MapSourceBank>
1290:
1291: top_2RW_src_bank_mapped:
1292:
1293: jmp top_2RW_bank_loop
1294:
1295: top_2RW_done:
1296: PLAIN_RET
1297:
1298:
1299: ;-----------------------------------------------------------------------;
1300: ; Banking for 2 R/W and unbanked adapters, bottom to top.
1301: ;-----------------------------------------------------------------------;
1302: align 4
1303: bottom_to_top_2RW:
1304:
1305: ; We're going bottom to top. Map in the source and dest, bottom-justified.
1306:
1307: mov ebx,pdsurf
1308: mov edx,ulCurrentSrcScan
1309: cmp edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source bottom less than
1310: ; current source bank?
1311: jl short bot_2RW_map_init_src_bank ;yes, map in proper bank
1312: cmp edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source bottom greater
1313: ; than current src bank?
1314: jl short bot_2RW_init_src_bank_mapped
1315: ;no, proper bank already mapped
1316: bot_2RW_map_init_src_bank:
1317:
1318: ; Map bank containing the bottom source scan line into source window.
1319: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1320:
1321: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1322: <ebx,edx,JustifyBottom,MapSourceBank>
1323:
1324: bot_2RW_init_src_bank_mapped:
1325:
1326: mov edx,ulCurrentDestScan
1327: cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest bottom less than
1328: ; current dest bank?
1329: jl short bot_2RW_map_init_dest_bank ;yes, map in proper bank
1330: cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest bottom greater
1331: ; than current dst bank?
1332: jl short bot_2RW_init_dest_bank_mapped
1333: ;no, proper bank already mapped
1334: bot_2RW_map_init_dest_bank:
1335:
1336: ; Map bank containing the bottom dest scan line into source window.
1337: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1338:
1339: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1340: <ebx,edx,JustifyBottom,MapDestBank>
1341:
1342: bot_2RW_init_dest_bank_mapped:
1343:
1344: ; Bank-by-bank bottom-to-top copy loop.
1345:
1346: bot_2RW_bank_loop:
1347:
1348: ; Decide how far we can go before we run out of bank or rectangle to copy.
1349:
1350: mov edx,ulLastDestScan
1351: cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop
1352: jg short @F ;copy rectangle top is in this bank
1353: mov edx,[ebx].dsurf_rcl2WindowClipD.yTop ;dest extends to end
1354: ; of bank, at least
1355: @@:
1356: neg edx
1357: add edx,ulCurrentDestScan ;# of scans we can and want to do in
1358: inc edx ; the dest bank
1359:
1360: mov eax,ulCurrentSrcScan
1361: sub eax,[ebx].dsurf_rcl2WindowClipS.yTop
1362: inc eax ;# of scans we can do in the src bank
1363:
1364: cmp edx,eax
1365: jb short @F ;source bank isn't limiting
1366: mov edx,eax ;source bank is limiting
1367: @@:
1368: mov ulBlockHeight,edx ;# of scans we'll do in this bank
1369:
1370: ; We're ready to copy this block.
1371:
1372: THREAD_AND_START
1373:
1374: ; Any more scans to copy?
1375:
1376: mov eax,ulCurrentDestScan
1377: mov esi,ulBlockHeight
1378: sub eax,esi ;we've copied to dest up to here
1379: cmp ulLastDestScan,eax ;are we past the dest rect top?
1380: jg short bot_2RW_done ;yes, we're done
1381: mov ulCurrentDestScan,eax
1382:
1383: ; Now advance either or both banks, as needed.
1384:
1385: mov ebx,pdsurf
1386: cmp eax,[ebx].dsurf_rcl2WindowClipD.yTop ;dest scan less than
1387: ; current dest bank?
1388: jge short bot_2RW_dest_bank_mapped ;no, proper bank still mapped
1389:
1390: ; Map bank containing the current dest scan line into source window.
1391: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1392:
1393: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1394: <ebx,eax,JustifyBottom,MapDestBank>
1395:
1396: bot_2RW_dest_bank_mapped:
1397:
1398: mov eax,ulCurrentSrcScan
1399: sub eax,esi ;we've copied from source up to here
1400: mov ulCurrentSrcScan,eax
1401:
1402: cmp eax,[ebx].dsurf_rcl2WindowClipS.yTop ;src scan less than
1403: ; current src bank?
1404: jge short bot_2RW_src_bank_mapped ;no, proper bank still mapped
1405:
1406: ; Map bank containing the current source scan line into source window.
1407: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1408:
1409: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1410: <ebx,eax,JustifyBottom,MapSourceBank>
1411:
1412: bot_2RW_src_bank_mapped:
1413:
1414: jmp bot_2RW_bank_loop
1415:
1416: bot_2RW_done:
1417: PLAIN_RET
1418:
1419:
1420: ;-----------------------------------------------------------------------;
1421: ; Banking for 1R/1W adapters, top to bottom.
1422: ;-----------------------------------------------------------------------;
1423: align 4
1424: top_to_bottom_1R1W:
1425:
1426: ; We're going top to bottom. Map in the source and dest, top-justified.
1427:
1428: mov ebx,pdsurf
1429: mov edx,ulCurrentSrcScan
1430: cmp edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source top less than
1431: ; current source bank?
1432: jl short top_1R1W_map_init_src_bank ;yes, map in proper bank
1433: cmp edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source top greater than
1434: ; current source bank?
1435: jl short top_1R1W_init_src_bank_mapped
1436: ;no, proper bank already mapped
1437: top_1R1W_map_init_src_bank:
1438:
1439: ; Map bank containing the top source scan line into source window.
1440: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1441:
1442: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1443: <ebx,edx,JustifyTop,MapSourceBank>
1444:
1445: top_1R1W_init_src_bank_mapped:
1446:
1447: mov edx,ulCurrentDestScan
1448: cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest top less than
1449: ; current dest bank?
1450: jl short top_1R1W_map_init_dest_bank ;yes, map in proper bank
1451: cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest top greater than
1452: ; current dest bank?
1453: jl short top_1R1W_init_dest_bank_mapped
1454: ;no, proper bank already mapped
1455: top_1R1W_map_init_dest_bank:
1456:
1457: ; Map bank containing the top dest scan line into source window.
1458: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1459:
1460: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1461: <ebx,edx,JustifyTop,MapDestBank>
1462:
1463: top_1R1W_init_dest_bank_mapped:
1464:
1465: ; Bank-by-bank top-to-bottom copy loop.
1466:
1467: top_1R1W_bank_loop:
1468:
1469: ; Decide how far we can go before we run out of bank or rectangle to copy.
1470:
1471: mov edx,ulLastDestScan
1472: cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom
1473: jl short @F ;copy rectangle bottom is in this bank
1474: mov edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest extends to end
1475: ; of bank, at least
1476: @@:
1477: sub edx,ulCurrentDestScan ;# of scans we can and want to do in
1478: ; the dest bank
1479: mov eax,[ebx].dsurf_rcl2WindowClipS.yBottom
1480: sub eax,ulCurrentSrcScan ;# of scans we can do in the src bank
1481:
1482: cmp edx,eax
1483: jb short @F ;source bank isn't limiting
1484: mov edx,eax ;source bank is limiting
1485: @@:
1486: mov ulBlockHeight,edx ;# of scans we'll do in this bank
1487:
1488: ; We're ready to copy this block.
1489: ; Select different threading, depending on whether the source and destination
1490: ; are currently in the same bank; we can do edges faster if they are.
1491:
1492: mov eax,[ebx].dsurf_ulWindowBank
1493: cmp eax,[ebx].dsurf_ulWindowBank[4]
1494: jz short top_1R1W_copy_same_bank
1495:
1496: ; Source and dest are currently in different banks, must go through temp buffer.
1497:
1498: THREAD_AND_START pCurrentThreadViaBuffer,top_1R1W_check_more_scans
1499:
1500: ; Source and dest are currently in the same bank.
1501:
1502: align 4
1503: top_1R1W_copy_same_bank:
1504: THREAD_AND_START
1505:
1506: ; Any more scans to copy?
1507:
1508: top_1R1W_check_more_scans:
1509:
1510: mov eax,ulCurrentDestScan
1511: mov esi,ulBlockHeight
1512: add eax,esi ;we've copied to dest up to here
1513: cmp ulLastDestScan,eax ;are we at the dest rect bottom?
1514: jz short top_1R1W_done ;yes, we're done
1515: mov ulCurrentDestScan,eax
1516:
1517: ; Now advance either or both banks, as needed.
1518:
1519: mov ebx,pdsurf
1520: cmp eax,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest scan greater than
1521: ; current dest bank?
1522: jl short top_1R1W_dest_bank_mapped ;no, proper bank still mapped
1523:
1524: ; Map bank containing the current dest scan line into source window.
1525: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1526:
1527: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1528: <ebx,eax,JustifyTop,MapDestBank>
1529:
1530: top_1R1W_dest_bank_mapped:
1531:
1532: add esi,ulCurrentSrcScan ;we've copied from source up to here
1533: mov ulCurrentSrcScan,esi
1534:
1535: cmp esi,[ebx].dsurf_rcl2WindowClipS.yBottom ;src scan greater than
1536: ; current src bank?
1537: jl short top_1R1W_src_bank_mapped ;no, proper bank still mapped
1538:
1539: ; Map bank containing the current source scan line into source window.
1540: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1541:
1542: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1543: <ebx,esi,JustifyTop,MapSourceBank>
1544:
1545: top_1R1W_src_bank_mapped:
1546:
1547: jmp top_1R1W_bank_loop
1548:
1549: top_1R1W_done:
1550: PLAIN_RET
1551:
1552:
1553: ;-----------------------------------------------------------------------;
1554: ; Banking for 1R/1W adapters, bottom to top.
1555: ;-----------------------------------------------------------------------;
1556: align 4
1557: bottom_to_top_1R1W:
1558:
1559: ; We're going bottom to top. Map in the source and dest, bottom-justified.
1560:
1561: mov ebx,pdsurf
1562: mov edx,ulCurrentSrcScan
1563: cmp edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source bottom less than
1564: ; current source bank?
1565: jl short bot_1R1W_map_init_src_bank ;yes, map in proper bank
1566: cmp edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source bottom greater
1567: ; than current src bank?
1568: jl short bot_1R1W_init_src_bank_mapped
1569: ;no, proper bank already mapped
1570: bot_1R1W_map_init_src_bank:
1571:
1572: ; Map bank containing the bottom source scan line into source window.
1573: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1574:
1575: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1576: <ebx,edx,JustifyBottom,MapSourceBank>
1577:
1578: bot_1R1W_init_src_bank_mapped:
1579:
1580: mov edx,ulCurrentDestScan
1581: cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest bottom less than
1582: ; current dest bank?
1583: jl short bot_1R1W_map_init_dest_bank ;yes, map in proper bank
1584: cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest bottom greater
1585: ; than current dst bank?
1586: jl short bot_1R1W_init_dest_bank_mapped
1587: ;no, proper bank already mapped
1588: bot_1R1W_map_init_dest_bank:
1589:
1590: ; Map bank containing the bottom dest scan line into source window.
1591: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1592:
1593: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1594: <ebx,edx,JustifyBottom,MapDestBank>
1595:
1596: bot_1R1W_init_dest_bank_mapped:
1597:
1598: ; Bank-by-bank bottom-to-top copy loop.
1599:
1600: bot_1R1W_bank_loop:
1601:
1602: ; Decide how far we can go before we run out of bank or rectangle to copy.
1603:
1604: mov edx,ulLastDestScan
1605: cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop
1606: jg short @F ;copy rectangle top is in this bank
1607: mov edx,[ebx].dsurf_rcl2WindowClipD.yTop ;dest extends to end
1608: ; of bank, at least
1609: @@:
1610: neg edx
1611: add edx,ulCurrentDestScan ;# of scans we can and want to do in
1612: inc edx ; the dest bank
1613:
1614: mov eax,ulCurrentSrcScan
1615: sub eax,[ebx].dsurf_rcl2WindowClipS.yTop
1616: inc eax ;# of scans we can do in the src bank
1617:
1618: cmp edx,eax
1619: jb short @F ;source bank isn't limiting
1620: mov edx,eax ;source bank is limiting
1621: @@:
1622: mov ulBlockHeight,edx ;# of scans we'll do in this bank
1623:
1624: ; We're ready to copy this block.
1625: ; Select different threading, depending on whether the source and destination
1626: ; are currently in the same bank; we can do edges faster if they are.
1627:
1628: mov al,byte ptr [ebx].dsurf_ulWindowBank
1629: cmp al,byte ptr [ebx].dsurf_ulWindowBank[4]
1630: jz short bot_1R1W_copy_same_bank
1631:
1632: ; Source and dest are currently in different banks, must go through temp buffer.
1633:
1634: THREAD_AND_START pCurrentThreadViaBuffer,bot_1R1W_check_more_scans
1635:
1636: ; Source and dest are currently in the same bank.
1637:
1638: align 4
1639: bot_1R1W_copy_same_bank:
1640: THREAD_AND_START
1641:
1642: ; Any more scans to copy?
1643:
1644: align 4
1645: bot_1R1W_check_more_scans:
1646:
1647: mov eax,ulCurrentDestScan
1648: mov esi,ulBlockHeight
1649: sub eax,esi ;we've copied to dest up to here
1650: cmp ulLastDestScan,eax ;are we past the dest rect top?
1651: jg short bot_1R1W_done ;yes, we're done
1652: mov ulCurrentDestScan,eax
1653:
1654: ; Now advance either or both banks, as needed.
1655:
1656: mov ebx,pdsurf
1657: cmp eax,[ebx].dsurf_rcl2WindowClipD.yTop ;dest scan less than
1658: ; current dest bank?
1659: jge short bot_1R1W_dest_bank_mapped ;no, proper bank still mapped
1660:
1661: ; Map bank containing the current dest scan line into source window.
1662: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1663:
1664: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1665: <ebx,eax,JustifyBottom,MapDestBank>
1666:
1667: bot_1R1W_dest_bank_mapped:
1668:
1669: mov eax,ulCurrentSrcScan
1670: sub eax,esi ;we've copied from source up to here
1671: mov ulCurrentSrcScan,eax
1672:
1673: cmp eax,[ebx].dsurf_rcl2WindowClipS.yTop ;src scan less than
1674: ; current src bank?
1675: jge short bot_1R1W_src_bank_mapped ;no, proper bank still mapped
1676:
1677: ; Map bank containing the current source scan line into source window.
1678: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1679:
1680: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
1681: <ebx,eax,JustifyBottom,MapSourceBank>
1682:
1683: bot_1R1W_src_bank_mapped:
1684:
1685: jmp bot_1R1W_bank_loop
1686:
1687: bot_1R1W_done:
1688: PLAIN_RET
1689:
1690:
1691: ;-----------------------------------------------------------------------;
1692: ; Banking for 1 R/W adapters, top to bottom.
1693: ;-----------------------------------------------------------------------;
1694: align 4
1695: top_to_bottom_1RW:
1696:
1697: ; We're going top to bottom. Map in the dest, top-justified.
1698:
1699: mov ebx,pdsurf
1700: mov esi,ulCurrentDestScan
1701: cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;is dest top less than
1702: ; current bank?
1703: jl short top_1RW_map_init_dest_bank ;yes, map in proper bank
1704: cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest top greater than
1705: ; current bank?
1706: jl short top_1RW_init_dest_bank_mapped
1707: ;no, proper bank already mapped
1708: top_1RW_map_init_dest_bank:
1709:
1710: ; Map bank containing the top dest scan line into source window.
1711: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1712:
1713: ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
1714:
1715: top_1RW_init_dest_bank_mapped:
1716:
1717: ; Bank-by-bank top-to-bottom copy loop.
1718:
1719: top_1RW_bank_loop:
1720:
1721: ; Decide how far we can go before we run out of bank or rectangle to copy.
1722:
1723: mov edi,ulLastDestScan
1724: cmp edi,[ebx].dsurf_rcl1WindowClip.yBottom
1725: jl short @F ;copy rectangle bottom is in this bank
1726: mov edi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest extends to end
1727: ; of bank, at least
1728: @@:
1729: sub edi,esi ;# of scans we can and want to do in the dest bank
1730:
1731: ; Now make sure source is mapped in. This is the condition the copying routines
1732: ; expect, and we need to figure out how far we can go in the source.
1733:
1734: sub edx,edx ;assume source and dest are in the same
1735: ; bank
1736: mov esi,ulCurrentSrcScan
1737: cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;src scan less than
1738: ; current bank?
1739: jl short top_1RW_map_src_Bank ;yes, must map in
1740: cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;src scan greater than
1741: ; current bank?
1742: jl short top_1RW_src_bank_mapped ;no, proper bank still mapped
1743:
1744: top_1RW_map_src_Bank:
1745:
1746: ; Map bank containing the current source scan line into source window.
1747: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1748:
1749: ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
1750:
1751: mov edx,1 ;mark that source and dest are not in
1752: ; the same bank
1753: top_1RW_src_bank_mapped:
1754:
1755: mov eax,[ebx].dsurf_rcl1WindowClip.yBottom
1756: sub eax,esi ;# of scans we can do in the src bank
1757:
1758: cmp edi,eax
1759: jb short @F ;source bank isn't limiting
1760: mov edi,eax ;source bank is limiting
1761: @@:
1762: mov ulBlockHeight,edi ;# of scans we'll do in this bank
1763:
1764: ; We're ready to copy this block.
1765: ; Select different threading, depending on whether the source and destination
1766: ; are currently in the same bank; we can do edges faster if they are.
1767:
1768: and edx,edx
1769: jz short top_1RW_copy_same_bank
1770:
1771: ; Source and dest are currently in different banks, must go through temp buffer.
1772:
1773: THREAD_AND_START pCurrentThreadViaBuffer,top_1RW_check_more_scans
1774:
1775: ; Source and dest are currently in the same bank.
1776:
1777: align 4
1778: top_1RW_copy_same_bank:
1779: THREAD_AND_START
1780:
1781: ; Any more scans to copy?
1782:
1783: top_1RW_check_more_scans:
1784:
1785: mov esi,ulCurrentDestScan
1786: mov edi,ulBlockHeight
1787: add esi,edi ;we've copied to dest up to here
1788: cmp ulLastDestScan,esi ;are we at the dest rect bottom?
1789: jz short top_1RW_done ;yes, we're done
1790: mov ulCurrentDestScan,esi
1791:
1792: ; Now make sure the dest bank is mapped in.
1793:
1794: mov ebx,pdsurf
1795: cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;dest scan less than
1796: ; current bank?
1797: jl short top_1RW_map_dest_bank ;yes, map in dest bank
1798: cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest scan greater than
1799: ; current bank?
1800: jl short top_1RW_dest_bank_mapped ;no, proper bank mapped
1801:
1802: top_1RW_map_dest_bank:
1803:
1804: ; Map bank containing the current dest scan line into source window.
1805: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1806:
1807: ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
1808:
1809: top_1RW_dest_bank_mapped:
1810:
1811: add ulCurrentSrcScan,edi ;we've copied from source up to here
1812:
1813: jmp top_1RW_bank_loop
1814:
1815: top_1RW_done:
1816: PLAIN_RET
1817:
1818:
1819: ;-----------------------------------------------------------------------;
1820: ; Banking for 1 R/W adapters, bottom to top.
1821: ;-----------------------------------------------------------------------;
1822: align 4
1823: bottom_to_top_1RW:
1824:
1825: ; We're going bottom to top. Map in the dest, bottom-justified.
1826:
1827: mov ebx,pdsurf
1828: mov esi,ulCurrentDestScan
1829: cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;is dest bottom less than
1830: ; current dest bank?
1831: jl short bot_1RW_map_init_dest_bank ;yes, map in proper bank
1832: cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest bottom greater
1833: ; than current dst bank?
1834: jl short bot_1RW_init_dest_bank_mapped
1835: ;no, proper bank already mapped
1836: bot_1RW_map_init_dest_bank:
1837:
1838: ; Map bank containing the bottom dest scan line into source window.
1839: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1840:
1841: ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
1842:
1843: bot_1RW_init_dest_bank_mapped:
1844:
1845: ; Bank-by-bank bottom-to-top copy loop.
1846:
1847: bot_1RW_bank_loop:
1848:
1849: ; Decide how far we can go before we run out of bank or rectangle to copy.
1850:
1851: mov edi,ulLastDestScan
1852: cmp edi,[ebx].dsurf_rcl1WindowClip.yTop
1853: jg short @F ;copy rectangle top is in this bank
1854: mov edi,[ebx].dsurf_rcl1WindowClip.yTop ;dest extends to end
1855: ; of bank, at least
1856: @@:
1857: neg edi
1858: add edi,esi ;# of scans we can and want to do in
1859: inc edi ; the dest bank
1860:
1861: ; Now make sure source is mapped in. This is the condition the copying routines
1862: ; expect, and we need to figure out how far we can go in the source.
1863:
1864: sub edx,edx ;assume source and dest are in the same
1865: ; bank
1866: mov esi,ulCurrentSrcScan
1867: cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;src scan less than
1868: ; current bank?
1869: jl short bot_1RW_map_src_Bank ;yes, must map in
1870: cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;src scan greater than
1871: ; current bank?
1872: jl short bot_1RW_src_bank_mapped ;no, proper bank still mapped
1873:
1874: bot_1RW_map_src_Bank:
1875:
1876: ; Map bank containing the current source scan line into source window.
1877: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1878:
1879: ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
1880:
1881: mov edx,1 ;mark that source and dest are not in
1882: ; the same bank
1883: bot_1RW_src_bank_mapped:
1884:
1885: sub esi,[ebx].dsurf_rcl1WindowClip.yTop
1886: inc esi ;# of scans we can do in the src bank
1887:
1888: cmp edi,esi
1889: jb short @F ;source bank isn't limiting
1890: mov edi,esi ;source bank is limiting
1891: @@:
1892: mov ulBlockHeight,edi ;# of scans we'll do in this bank
1893:
1894: ; We're ready to copy this block.
1895: ; Select different threading, depending on whether the source and destination
1896: ; are currently in the same bank; we can copy much faster if they are.
1897:
1898: and edx,edx
1899: jz short bot_1RW_copy_same_bank
1900:
1901: ; Source and dest are currently in different banks, must go through temp buffer.
1902:
1903: THREAD_AND_START pCurrentThreadViaBuffer,bot_1RW_check_more_scans
1904:
1905: ; Source and dest are currently in the same bank.
1906:
1907: align 4
1908: bot_1RW_copy_same_bank:
1909: THREAD_AND_START
1910:
1911: ; Any more scans to copy?
1912:
1913: align 4
1914: bot_1RW_check_more_scans:
1915:
1916: mov esi,ulCurrentDestScan
1917: mov edi,ulBlockHeight
1918: sub esi,edi ;we've copied to dest up to here
1919: cmp ulLastDestScan,esi ;are we past the dest rect top?
1920: jg short bot_1RW_done ;yes, we're done
1921: mov ulCurrentDestScan,esi
1922:
1923: ; Now make sure the dest bank is mapped in.
1924:
1925: mov ebx,pdsurf
1926: cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;dest scan less than
1927: ; current bank?
1928: jl short bot_1RW_map_dest_bank ;yes, map in dest bank
1929: cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest scan greater than
1930: ; current bank?
1931: jl short bot_1RW_dest_bank_mapped ;no, proper bank mapped
1932:
1933: bot_1RW_map_dest_bank:
1934:
1935: ; Map bank containing the current dest scan line into source window.
1936: ; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
1937:
1938: ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
1939:
1940: bot_1RW_dest_bank_mapped:
1941:
1942: sub ulCurrentSrcScan,edi ;we've copied from source up to here
1943:
1944: jmp bot_1RW_bank_loop
1945:
1946: bot_1RW_done:
1947: PLAIN_RET
1948:
1949:
1950: ;***********************************************************************;
1951: ;
1952: ; The following routines are the low-level copying routines. They know
1953: ; almost nothing about banks (the routines that copy through a temp
1954: ; buffer know how to switch banks after filling the temp buffer, but
1955: ; that's it). Banking should be taken care of at a higher level.
1956: ;
1957: ;***********************************************************************;
1958:
1959: ;-----------------------------------------------------------------------;
1960: ; Copies a block of solid bytes directly from the source to the
1961: ; destination, without using a temp buffer. We can't use the latches,
1962: ; though, because this is a rotated copy. Can only be used by 2 R/W or
1963: ; 1R/1W window banking, or by unbanked modes, or by 1 R/W adapters when
1964: ; the source and dest are in the same bank. 1 R/W adapters must go
1965: ; through an intermediate local buffer when the source and the destination
1966: ; aren't in the same bank.
1967: ;
1968: ; Input:
1969: ; Direction Flag set for desired direction of copy
1970: ; culWholeBytesWidth = # of bytes to copy across each scan line
1971: ; ulWholeScanDelta = distance to start of next dest scan from end of
1972: ; current
1973: ; ulWholeScanSrcDelta = distance to start of next source scan from end of
1974: ; current
1975: ; ulBlockHeight = # of scans to copy
1976: ; ulWholeBytesSrc = start source offset in bitmap
1977: ; ulWholeBytesDest = start dest offset in bitmap
1978: ; ulCombineMaskWhole = masking to be applied before ORing the two source
1979: ; bytes together, to keep only the data needed in preparation
1980: ; for the VGA rotator doing its stuff
1981: ;
1982: ; Output:
1983: ; Advances ulWholeBytesSrc and ulWholeBytesDest to scan after last
1984: ; scan processed
1985: ;-----------------------------------------------------------------------;
1986:
1987: align 4
1988: copy_whole_bytes:
1989:
1990: ; Calculate start source and dest addresses from bitmap start addresses and
1991: ; offsets within bitmap.
1992:
1993: mov ecx,pdsurf
1994: mov eax,ulWholeBytesSrc
1995: add eax,[ecx].dsurf_pvBitmapStart2WindowS
1996: mov pSrcAddr,eax
1997: mov eax,ulWholeBytesDest
1998: add eax,[ecx].dsurf_pvBitmapStart2WindowD
1999: mov pDestAddr,eax
2000:
2001: ; Set the bit mask to enable all bits.
2002:
2003: mov edx,VGA_BASE + GRAF_ADDR
2004: mov eax,(0ffh shl 8) + GRAF_BIT_MASK
2005: out dx,ax
2006:
2007: ; Leave GC Index pointing to the Read Map register.
2008:
2009: mov al,GRAF_READ_MAP
2010: out dx,al
2011:
2012: ; Set up to copy the whole bytes from the buffer.
2013:
2014: mov eax,ulBlockHeight
2015: mov ulTempScanCount,eax
2016:
2017: mov ebx,culWholeBytesWidth
2018: SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyWholeRWEntry, \
2019: LOOP_UNROLL_SHIFT
2020: mov culTempCount,ebx ;remember # of unrolled loop iterations
2021: mov pTempEntry,ecx ;ditto for entry point
2022:
2023: copy_whole_scan_loop:
2024:
2025: mov cl,MM_C3 ;start by copying plane 3 (for Map Mask)
2026:
2027: copy_whole_plane_loop:
2028:
2029: ; Set Map Mask to enable writes to the plane we're copying.
2030:
2031: mov edx,VGA_BASE + SEQ_DATA
2032: mov al,cl
2033: out dx,al
2034:
2035: ; Set Read Map to enable reads from the plane we're copying.
2036:
2037: mov dl,GRAF_DATA
2038: shr al,1 ;map plane into ReadMask
2039: cmp al,100b ;set Carry if not C3 (plane 3)
2040: adc al,-1 ;sub 1 only if C3
2041: out dx,al
2042:
2043: ; Select the corresponding plane from the temp buffer.
2044:
2045: mov esi,pSrcAddr ;source offset in screen
2046: mov edi,pDestAddr ;point to destination start
2047:
2048: lodsb ;prime the rotation pipeline
2049: mov ah,al ;for combining with the next byte
2050:
2051: mov ebx,culTempCount
2052: mov edx,ulCombineMaskWhole
2053: jmp pTempEntry
2054:
2055:
2056: ;-----------------------------------------------------------------------;
2057: ; Table of unrolled copy whole bytes from buffer loop entry points.
2058: ;-----------------------------------------------------------------------;
2059:
2060: UNROLL_LOOP_ENTRY_TABLE pfnCopyWholeRWEntry, \
2061: WHOLE_RW, LOOP_UNROLL_COUNT
2062:
2063: ;-----------------------------------------------------------------------;
2064: ; Unrolled loop for copying whole bytes from the buffer.
2065: ;-----------------------------------------------------------------------;
2066:
2067: COPY_WHOLE_RW macro ENTRY_LABEL,ENTRY_INDEX
2068: &ENTRY_LABEL&ENTRY_INDEX&:
2069: lodsb ;get byte to copy
2070: mov ch,al ;set aside for next time
2071: and eax,edx ;mask the bytes in preparation for combining
2072: ; and rotating them
2073: or al,ah ;combine them
2074: stosb ;write the composite byte
2075: ; VGA rotates during write
2076: mov ah,ch ;prepare byte for combining next time
2077: endm ;-----------------------------------;
2078:
2079: ; AH = rotation pipeline-priming byte
2080: ; EDX = mask to preserve desired portions of AH and AL before combining
2081: ; ESI = source address to copy from
2082: ; EDI = target address to copy to
2083: ; Map Mask set to enable the desired plane for write
2084: ; Bit Mask set to enable all bits
2085:
2086: align 4
2087: copy_whole_loop:
2088: UNROLL_LOOP COPY_WHOLE_RW,WHOLE_RW,LOOP_UNROLL_COUNT
2089:
2090: dec ebx
2091: jnz copy_whole_loop
2092:
2093: ; Do next plane, if any.
2094:
2095: shr cl,1 ;advance to next plane
2096: jnz copy_whole_plane_loop
2097:
2098: ; Remember where we left off, for next scan.
2099:
2100: add edi,ulWholeScanDelta ;point to next dest scan
2101: mov pDestAddr,edi
2102: add esi,ulWholeScanSrcDelta ;point to next source scan
2103: mov pSrcAddr,esi
2104:
2105: ; Count down scan lines.
2106:
2107: dec ulTempScanCount
2108: jnz copy_whole_scan_loop
2109:
2110: ; Remember where we left off, for next time.
2111:
2112: mov ecx,pdsurf
2113: sub esi,[ecx].dsurf_pvBitmapStart2WindowS
2114: mov ulWholeBytesSrc,esi
2115: sub edi,[ecx].dsurf_pvBitmapStart2WindowD
2116: mov ulWholeBytesDest,edi
2117:
2118: PLAIN_RET
2119:
2120:
2121: ;-----------------------------------------------------------------------;
2122: ; Copies a block of solid bytes from the source to the destination via
2123: ; the temp buffer. This should only be used by 1 R/W adapters, and then
2124: ; only when the source and dest are in different banks.
2125: ;
2126: ; All relevant bytes are first copied from the source to a temp buffer that's
2127: ; an image of the source. Then, we copy each of the four planes for one scan
2128: ; line from the temp buffer to the screen before going on to the next scan
2129: ; line. See ALIGNBLT.ASM for comments about why this is done.
2130: ;
2131: ; Input:
2132: ; Direction Flag set for desired direction of copy
2133: ; culWholeBytesWidth = # of bytes to copy across each scan line
2134: ; ulWholeScanDelta = distance to start of next scan from end of current
2135: ; ulNextScan = width of a scan line
2136: ; ulBlockHeight = # of scans to copy
2137: ; ulWholeBytesSrc = start source offset in bitmap
2138: ; ulWholeBytesDest = start dest offset in bitmap
2139: ; ppTempPlane0 = pointer to pointer to plane 0 storage in temp buffer
2140: ; ppTempPlane3 = pointer to pointer to plane 3 storage in temp buffer
2141: ; ulCombineMaskWhole = masking to be applied before ORing the two source
2142: ; bytes together, to keep only the data needed in preparation
2143: ; for the VGA rotator doing its stuff
2144: ; Expects the source bank to be mapped in; source bank is mapped in on
2145: ; exit
2146: ;
2147: ; Output:
2148: ; Advances ulWholeBytesSrc and ulWholeBytesDest to scan after last
2149: ; scan processed
2150: ;-----------------------------------------------------------------------;
2151:
2152: align 4
2153: copy_whole_bytes_via_buffer:
2154:
2155: ; Calculate start source address from bitmap start address and offset within
2156: ; bitmap.
2157:
2158: mov ecx,pdsurf
2159: mov eax,ulWholeBytesSrc
2160: add eax,[ecx].dsurf_pvBitmapStart
2161: mov pSrcAddr,eax
2162: sub eax,[ecx].dsurf_pvStart
2163: mov ulOffsetInBank,eax ;will come in handy because we treat the
2164: ; temp buffer as an image of the current
2165: ; bank
2166:
2167: ; First, copy all the bytes into the temporary buffer.
2168:
2169: ; Leave the GC Index pointing to the Read Map.
2170:
2171: mov edx,VGA_BASE + GRAF_ADDR
2172: mov al,GRAF_READ_MAP
2173: out dx,al
2174:
2175: mov eax,3 ;start by copying plane 3
2176: copy_whole_to_buffer_plane_loop:
2177: mov ebx,ulBlockHeight ;# of scans to copy
2178: mov esi,pSrcAddr ;source offset in screen
2179: mov edi,ppTempPlane0
2180: mov edi,[edi+eax*4] ;pointer to current plane in temp buffer
2181: add edi,ulOffsetInBank ;dest for plane in temp buffer
2182:
2183: mov edx,VGA_BASE + GRAF_DATA
2184: out dx,al ;set Read Map to plane we're copying from.
2185:
2186: push eax ;remember plane index
2187: mov eax,ulWholeScanSrcDelta ;offset to next scan
2188: mov edx,culWholeBytesWidth ;# of bytes per scan
2189: inc edx ;always one more source byte than dest byte
2190: copy_whole_to_buffer_scan_loop:
2191: mov ecx,edx ;# of bytes per scan
2192: rep movsb ;copy the scan line to the temp buffer
2193: add esi,eax ;point to next source scan
2194: add edi,eax ;point to next dest scan
2195:
2196: dec ebx ;count down scan lines
2197: jnz copy_whole_to_buffer_scan_loop
2198:
2199: pop eax ;get back plane index
2200: dec eax ;count down planes
2201: jns copy_whole_to_buffer_plane_loop
2202:
2203: ; Remember where we left off, for next time.
2204:
2205: mov ebx,pdsurf
2206: sub esi,[ebx].dsurf_pvBitmapStart
2207: mov ulWholeBytesSrc,esi
2208:
2209:
2210: ; Now copy the temp buffer to the screen.
2211:
2212: ; Map in the destination bank, so we can read/write to it and let the Bit Mask
2213: ; work.
2214:
2215: ptrCall <dword ptr [ebx].dsurf_pfnBankControl>, \
2216: <ebx,ulCurrentDestScan,ulCurrentJustification>
2217:
2218: ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
2219: ; until now to calculate this, because the dest bank wasn't mapped earlier).
2220:
2221: mov eax,ulWholeBytesDest
2222: add eax,[ebx].dsurf_pvBitmapStart
2223: mov pDestAddr,eax
2224:
2225: ; Set the bit mask to enable all bits.
2226:
2227: mov edx,VGA_BASE + GRAF_ADDR
2228: mov eax,(0ffh shl 8) + GRAF_BIT_MASK
2229: out dx,ax
2230:
2231: ; Set up to copy the whole bytes from the buffer.
2232:
2233: mov eax,ulBlockHeight
2234: mov ulTempScanCount,eax
2235:
2236: mov ebx,culWholeBytesWidth
2237: SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyWholeFromBufferEntry, \
2238: LOOP_UNROLL_SHIFT
2239: mov culTempCount,ebx ;remember # of unrolled loop iterations
2240: mov pTempEntry,ecx ;ditto for entry point
2241:
2242: copy_whole_from_buffer_scan_loop:
2243:
2244: mov ebx,ppTempPlane3 ;point to plane 3's temp buffer offset
2245: mov cl,MM_C3 ;start by copying plane 3
2246:
2247: copy_whole_from_buffer_plane_loop:
2248:
2249: ; Set Map Mask to enable writes to the plane we're copying.
2250:
2251: mov edx,VGA_BASE + SEQ_DATA
2252: mov al,cl
2253: out dx,al
2254:
2255: ; Select the corresponding plane from the temp buffer.
2256:
2257: mov esi,[ebx] ;point to plane start in temp buffer
2258: sub ebx,4 ;point to next temp buffer plane ptr
2259: push ebx ;preserve pointer to plane pointer
2260:
2261: add esi,ulOffsetInBank ;point to current scan start in temp buffer
2262: mov edi,pDestAddr ;point to destination start
2263:
2264: lodsb ;prime the rotation pipeline
2265: mov ah,al ;for combining with the next byte
2266:
2267: mov ebx,culTempCount
2268: mov edx,ulCombineMaskWhole
2269: jmp pTempEntry
2270:
2271:
2272: ;-----------------------------------------------------------------------;
2273: ; Table of unrolled copy whole bytes from buffer loop entry points.
2274: ;-----------------------------------------------------------------------;
2275:
2276: UNROLL_LOOP_ENTRY_TABLE pfnCopyWholeFromBufferEntry, \
2277: WHOLE_FROM_BUFFER, LOOP_UNROLL_COUNT
2278:
2279: ;-----------------------------------------------------------------------;
2280: ; Unrolled loop for copying whole bytes from the buffer.
2281: ;-----------------------------------------------------------------------;
2282:
2283: COPY_WHOLE_FROM_BUFFER macro ENTRY_LABEL,ENTRY_INDEX
2284: &ENTRY_LABEL&ENTRY_INDEX&:
2285: lodsb ;get byte to copy
2286: mov ch,al ;set aside for next time
2287: and eax,edx ;mask the bytes in preparation for combining
2288: ; and rotating them
2289: or al,ah ;combine them
2290: stosb ;write the composite byte
2291: ; VGA rotates during write
2292: mov ah,ch ;prepare byte for combining next time
2293: endm ;-----------------------------------;
2294:
2295: ; AH = rotation pipeline-priming byte
2296: ; EDX = mask to preserve desired portions of AH and AL before combining
2297: ; ESI = source address to copy from
2298: ; EDI = target address to copy to
2299: ; Map Mask set to enable the desired plane for write
2300: ; Bit Mask set to enable all bits
2301:
2302: align 4
2303: copy_whole_from_buffer_loop:
2304: UNROLL_LOOP COPY_WHOLE_FROM_BUFFER,WHOLE_FROM_BUFFER,LOOP_UNROLL_COUNT
2305:
2306: dec ebx
2307: jnz copy_whole_from_buffer_loop
2308:
2309: ; Do next plane, if any.
2310:
2311: pop ebx ;retrieve pointer to plane pointer
2312: shr cl,1 ;advance to next plane
2313: jnz copy_whole_from_buffer_plane_loop
2314:
2315: ; Remember where we left off, for next scan.
2316:
2317: add edi,ulWholeScanDelta ;point to next dest scan
2318: mov pDestAddr,edi
2319: mov eax,ulNextScan
2320: add ulOffsetInBank,eax ;next scan's start in temp buffer,
2321: ; relative to start of plane's storage
2322:
2323: ; Count down scan lines.
2324:
2325: dec ulTempScanCount
2326: jnz copy_whole_from_buffer_scan_loop
2327:
2328: ; Remember where we left off, for next time.
2329:
2330: mov ebx,pdsurf
2331: sub edi,[ebx].dsurf_pvBitmapStart
2332: mov ulWholeBytesDest,edi
2333:
2334: ; Put back the original source bank.
2335:
2336: ptrCall <dword ptr [ebx].dsurf_pfnBankControl>, \
2337: <ebx,ulCurrentSrcScan,ulCurrentJustification>
2338:
2339: PLAIN_RET
2340:
2341:
2342: ;-----------------------------------------------------------------------;
2343: ; Copies a strip of left edge bytes from the source to the destination,
2344: ; assuming both the source and the destination are both readable and
2345: ; writable. Can only be used by 2 R/W window banking, or by unbanked
2346: ; modes. 1 R/W and 1R/1W adapters must go through an intermediate local
2347: ; buffer when the source and dest are in different banks. Processes up to
2348: ; EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might cause
2349: ; flicker.
2350: ;
2351: ; Input:
2352: ; ulNextScan = width of scan, in bytes
2353: ; ulBlockHeight = # of scans to copy
2354: ; ulLeftEdgeSrc = start source offset in bitmap
2355: ; ulLeftEdgeDest = start dest offset in bitmap
2356: ; ulLeftSrcWidthMinus1 = width of left source edge minus 1 (0 or 1)
2357: ; jLeftMask = left edge clip mask
2358: ;
2359: ; Output:
2360: ; Advances ulLeftEdgeSrc and ulLeftEdgeDest to scan after last
2361: ; scan processed
2362: ;-----------------------------------------------------------------------;
2363:
2364: align 4
2365: copy_left_edge:
2366:
2367: ; Calculate start source and dest addresses from bitmap start addresses and
2368: ; offsets within bitmap.
2369:
2370: mov ecx,pdsurf
2371: mov esi,ulLeftEdgeSrc
2372: add esi,[ecx].dsurf_pvBitmapStart2WindowS
2373: mov edi,ulLeftEdgeDest
2374: add edi,[ecx].dsurf_pvBitmapStart2WindowD
2375:
2376: ; Copy the edge.
2377:
2378: mov ah,byte ptr jLeftMask ;clip mask for this edge
2379: mov ebx,ulLeftSrcWidthMinus1
2380: call copy_edge_table[ebx*4]
2381:
2382: ; Remember where we left off, for next time.
2383:
2384: mov ecx,pdsurf
2385: sub esi,[ecx].dsurf_pvBitmapStart2WindowS
2386: mov ulLeftEdgeSrc,esi
2387: sub edi,[ecx].dsurf_pvBitmapStart2WindowD
2388: mov ulLeftEdgeDest,edi
2389:
2390: PLAIN_RET
2391:
2392:
2393: ;-----------------------------------------------------------------------;
2394: ; Copies a strip of right edge bytes from the source to the destination,
2395: ; assuming both the source and the destination are both readable and
2396: ; writable. Can only be used by 2 R/W window banking, or by unbanked
2397: ; modes. 1 R/W and 1R/1W adapters must go through an intermediate local
2398: ; buffer when the source and dest are in different banks. Processes up to
2399: ; EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might cause
2400: ; flicker.
2401: ;
2402: ; Input:
2403: ; ulNextScan = width of scan, in bytes
2404: ; ulBlockHeight = # of scans to copy
2405: ; ulRightEdgeSrc = start source offset in bitmap
2406: ; ulRightEdgeDest = start dest offset in bitmap
2407: ; ulRightSrcWidthMinus1 = width of right source edge minus 1 (0 or 1)
2408: ; jRightMask = right edge clip mask
2409: ;
2410: ; Output:
2411: ; Advances ulRightEdgeSrc and ulRightEdgeDest to scan after last
2412: ; scan processed
2413: ;-----------------------------------------------------------------------;
2414:
2415: align 4
2416: copy_right_edge:
2417:
2418: ; Calculate start source and dest addresses from bitmap start addresses and
2419: ; offsets within bitmap.
2420:
2421: mov ecx,pdsurf
2422: mov esi,ulRightEdgeSrc
2423: add esi,[ecx].dsurf_pvBitmapStart2WindowS
2424: mov edi,ulRightEdgeDest
2425: add edi,[ecx].dsurf_pvBitmapStart2WindowD
2426:
2427: ; Copy the edge.
2428:
2429: mov ah,byte ptr jRightMask ;clip mask for this edge
2430: mov ebx,ulRightSrcWidthMinus1
2431: call copy_edge_table[ebx*4]
2432:
2433: ; Remember where we left off, for next time
2434:
2435: mov ecx,pdsurf
2436: sub esi,[ecx].dsurf_pvBitmapStart2WindowS
2437: mov ulRightEdgeSrc,esi
2438: sub edi,[ecx].dsurf_pvBitmapStart2WindowD
2439: mov ulRightEdgeDest,edi
2440:
2441: PLAIN_RET
2442:
2443:
2444: ;-----------------------------------------------------------------------;
2445: ; Copies an edge from a 1-wide source to the destination on the screen.
2446: ; Entry:
2447: ; AH = bit mask setting for edge
2448: ; ESI = source address
2449: ; EDI = destination address
2450: ; ulBlockHeight = # of bytes to copy per plane
2451: ; ulNextScan = scan width
2452: ; Source readable, and destination readable and writable
2453: ; Exit:
2454: ; ESI = next source address
2455: ; EDI = next destination address
2456: ;
2457: ; Preserved: EBP
2458: ;-----------------------------------------------------------------------;
2459:
2460: align 4
2461: copy_edge_1ws:
2462: mov pSrcAddr,esi
2463: mov pDestAddr,edi
2464:
2465: ; Set the clip mask for this edge.
2466:
2467: mov edx,VGA_BASE + GRAF_ADDR
2468: mov al,GRAF_BIT_MASK
2469: out dx,ax
2470:
2471: ; Leave the GC Index pointing to the Read Map.
2472:
2473: mov al,GRAF_READ_MAP
2474: out dx,al
2475:
2476: mov ecx,offset copy_edge_rw_1ws_full_chunk
2477: ;entry point into unrolled loop to copy first
2478: ; chunk, assuming it's a full chunk
2479: mov ebx,ulBlockHeight
2480:
2481: ; Copy the edge in a series of chunks.
2482:
2483: copy_edge_chunk_loop_1ws:
2484:
2485: sub ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
2486: ; a full chunk
2487: jge short @F ;do a full chunk
2488: add ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
2489: ; scans
2490: mov ecx,pfnCopyEdgeRWEntry_1ws[-4][ebx*4]
2491: ;entry point into unrolled loop to copy desired
2492: ; chunk size
2493: sub ebx,ebx ;no scans after this
2494: @@:
2495: push ebx ;remember remaining scan count
2496:
2497: mov ah,MM_C3 ;start by copying plane 3
2498: mov ebx,ulNextScan
2499:
2500: copy_edge_plane_loop_1ws:
2501:
2502: ; Set Map Mask to enable writes to plane we're copying.
2503:
2504: mov al,ah
2505: mov dl,SEQ_DATA
2506: out dx,al
2507:
2508: ; Set Read Map to same plane.
2509:
2510: shr al,1 ;map plane into ReadMask
2511: cmp al,100b ;set Carry if not C3 (plane 3)
2512: adc al,-1 ;sub 1 only if C3
2513: mov dl,GRAF_DATA
2514: out dx,al
2515:
2516: mov esi,pSrcAddr
2517: mov edi,pDestAddr
2518:
2519: jmp ecx ;copy the left edge
2520:
2521:
2522: ;-----------------------------------------------------------------------;
2523: ; Table of unrolled edge loop entry points. First entry point is to copy
2524: ; 1 byte, last entry point is to copy EDGE_CHUNK_SIZE bytes.
2525: ;-----------------------------------------------------------------------;
2526:
2527: pfnCopyEdgeRWEntry_1ws label dword
2528: INDEX = 1
2529: rept EDGE_CHUNK_SIZE
2530: DEFINE_DD EDGE_RW_1WS,%INDEX
2531: INDEX = INDEX+1
2532: endm
2533:
2534:
2535: ;-----------------------------------------------------------------------;
2536: ; Unrolled loop for copying a strip of edge bytes, with 1-wide source and
2537: ; destination both readable and writable.
2538: ;-----------------------------------------------------------------------;
2539:
2540: COPY_EDGE_RW_1WS macro ENTRY_LABEL,ENTRY_INDEX
2541: &ENTRY_LABEL&ENTRY_INDEX&:
2542: mov al,[esi] ;get byte to copy
2543: add esi,ebx ;point to next source scan
2544: xchg [edi],al ;read before write so Bit Mask can operate
2545: ; VGA rotates during write
2546: add edi,ebx ;point to next dest scan
2547: endm ;-----------------------------------;
2548:
2549: ; EBX = scan line width
2550: ; ESI = source address to copy from
2551: ; EDI = target address to copy to
2552: ; Bit Mask set to desired clipping
2553: ; Read Map and Map Mask set to enable the desired plane for read and write
2554:
2555: align 4
2556: copy_edge_rw_1ws_full_chunk:
2557: UNROLL_LOOP COPY_EDGE_RW_1WS,EDGE_RW_1WS,EDGE_CHUNK_SIZE
2558:
2559: ; Do next plane within this chunk, if any.
2560:
2561: shr ah,1 ;advance to next plane
2562: jnz copy_edge_plane_loop_1ws
2563:
2564: ; Remember where we left off, for the next chunk.
2565:
2566: mov pSrcAddr,esi
2567: mov pDestAddr,edi
2568:
2569: ; Do next chunk within this bank block, if any.
2570:
2571: pop ebx ;retrieve remaining scan count
2572: and ebx,ebx ;any scans left?
2573: jnz copy_edge_chunk_loop_1ws ;more scans to do
2574:
2575: PLAIN_RET
2576:
2577:
2578: ;-----------------------------------------------------------------------;
2579: ; Copies a strip of left edge bytes from the source to the destination
2580: ; through an intermediate RAM buffer. This is the approach required by
2581: ; 1 R/W and 1R/1W adapters when the source and dest are in different banks.
2582: ; Writes up to EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might
2583: ; cause flicker.
2584: ;
2585: ; Input:
2586: ; ulNextScan = width of scan, in bytes
2587: ; ulBlockHeight = # of scans to copy
2588: ; ulLeftEdgeSrc = start source offset in bitmap
2589: ; ulLeftEdgeDest = start dest offset in bitmap
2590: ; jLeftMask = left edge clip mask
2591: ; pTempPlane = pointer to temp storage buffer
2592: ; ulCurrentSrcScan = scan used to map in source bank
2593: ; ulCurrentDestScan = scan used to map in dest bank
2594: ; ulCurrentJustification = justification used to map in current bank
2595: ; ulLeftSrcWidthMinus1 = width of left source edge minus 1 (0 or 1)
2596: ; For 1 R/W adapters, expects the source bank to be mapped in; banking
2597: ; is the same at exit as it was at entry
2598: ;
2599: ; Output:
2600: ; Advances ulLeftEdgeSrc and ulLeftEdgeDest to scan after last
2601: ; scan processed
2602: ;
2603: ; Note that this should never be called for an unbanked or 2 R/W adapter,
2604: ; because the source and dest are always both addressable simultaneously then.
2605: ;-----------------------------------------------------------------------;
2606:
2607: align 4
2608: copy_left_edge_via_buffer:
2609:
2610: ; First, copy all the bytes into the temporary buffer.
2611:
2612: ; Calculate start source and dest addresses from bitmap start addresses and
2613: ; offsets within bitmap.
2614:
2615: mov ecx,pdsurf
2616: mov esi,ulLeftEdgeSrc
2617: add esi,[ecx].dsurf_pvBitmapStart2WindowS
2618:
2619: ; Copy the edge from the source to the temp buffer.
2620:
2621: mov eax,ulLeftSrcWidthMinus1
2622: call copy_edge_from_screen_to_buffer[eax*4]
2623:
2624: ; Remember where we left off, for next time
2625:
2626: mov ebx,pdsurf
2627: sub esi,[ebx].dsurf_pvBitmapStart2WindowS
2628: mov ulLeftEdgeSrc,esi
2629:
2630: ; Now copy the temp buffer to the screen.
2631:
2632: ; Map in the source bank to match the destination, so we can read/write to it
2633: ; and let the Bit Mask work. Note that on a 1 R/W adapter, both banks will be
2634: ; mapped by this call, which is fine.
2635:
2636: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
2637: <ebx,ulCurrentDestScan,ulCurrentJustification,MapSourceBank>
2638:
2639: ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
2640: ; until now to calculate this, because the dest bank wasn't mapped earlier).
2641:
2642: mov edi,ulLeftEdgeDest
2643: add edi,[ebx].dsurf_pvBitmapStart2WindowD
2644:
2645: ; Do the copy.
2646:
2647: mov ah,byte ptr jLeftMask ;clip mask for this edge
2648: mov ebx,ulLeftSrcWidthMinus1
2649: call copy_edge_from_buffer_to_screen[ebx*4]
2650:
2651: ; Remember where we left off, for next time.
2652:
2653: mov ebx,pdsurf
2654: sub edi,[ebx].dsurf_pvBitmapStart2WindowD
2655: mov ulLeftEdgeDest,edi
2656:
2657: ; Put back the original source bank. Note that on a 1 R/W adapter, both banks
2658: ; will be mapped by this call, which is fine.
2659:
2660: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
2661: <ebx,ulCurrentSrcScan,ulCurrentJustification,MapSourceBank>
2662:
2663: PLAIN_RET
2664:
2665:
2666: ;-----------------------------------------------------------------------;
2667: ; Copies a strip of right edge bytes from the source to the destination
2668: ; through an intermediate RAM buffer. This is the approach required by
2669: ; 1 R/W and 1R/1W adapters when the source and dest are in different banks.
2670: ; Writes up to EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might
2671: ; cause flicker.
2672: ;
2673: ; Input:
2674: ; ulNextScan = width of scan, in bytes
2675: ; ulBlockHeight = # of scans to copy
2676: ; ulRightEdgeSrc = start source offset in bitmap
2677: ; ulRightEdgeDest = start dest offset in bitmap
2678: ; jRightMask = right edge clip mask
2679: ; pTempPlane = pointer to temp storage buffer
2680: ; ulCurrentSrcScan = scan used to map in source bank
2681: ; ulCurrentDestScan = scan used to map in dest bank
2682: ; ulCurrentJustification = justification used to map in current bank
2683: ; ulRightSrcWidthMinus1 = width of right source edge minus 1 (0 or 1)
2684: ; For 1 R/W adapters, expects the source bank to be mapped in; banking
2685: ; is the same at exit as it was at entry
2686: ;
2687: ; Output:
2688: ; Advances ulRightEdgeSrc and ulRightEdgeDest to scan after last
2689: ; scan processed
2690: ;
2691: ; Note that this should never be called for an unbanked or 2 R/W adapter,
2692: ; because the source and dest are always both addressable simultaneously then.
2693: ;-----------------------------------------------------------------------;
2694:
2695: align 4
2696: copy_right_edge_via_buffer:
2697:
2698: ; First, copy all the bytes into the temporary buffer.
2699:
2700: ; Calculate start source address from bitmap start addresses and
2701: ; offsets within bitmap.
2702:
2703: mov ecx,pdsurf
2704: mov esi,ulRightEdgeSrc
2705: add esi,[ecx].dsurf_pvBitmapStart2WindowS
2706:
2707: ; Copy the edge from the source to the temp buffer.
2708:
2709: mov eax,ulRightSrcWidthMinus1
2710: call copy_edge_from_screen_to_buffer[eax*4]
2711:
2712: ; Remember where we left off, for next time
2713:
2714: mov ebx,pdsurf
2715: sub esi,[ebx].dsurf_pvBitmapStart2WindowS
2716: mov ulRightEdgeSrc,esi
2717:
2718: ; Now copy the temp buffer to the screen.
2719:
2720: ; Map in the source bank to match the destination, so we can read/write to it
2721: ; and let the Bit Mask work. Note that on a 1 R/W adapter, both banks will be
2722: ; mapped by this call, which is correct.
2723:
2724: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
2725: <ebx,ulCurrentDestScan,ulCurrentJustification,MapSourceBank>
2726:
2727: ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
2728: ; until now to calculate this, because the dest bank wasn't mapped earlier).
2729:
2730: mov edi,ulRightEdgeDest
2731: add edi,[ebx].dsurf_pvBitmapStart2WindowD
2732:
2733: ; Do the copy.
2734:
2735: mov ah,byte ptr jRightMask ;clip mask for this edge
2736: mov ebx,ulRightSrcWidthMinus1
2737: call copy_edge_from_buffer_to_screen[ebx*4]
2738:
2739: ; Remember where we left off, for next time.
2740:
2741: mov ebx,pdsurf
2742: sub edi,[ebx].dsurf_pvBitmapStart2WindowD
2743: mov ulRightEdgeDest,edi
2744:
2745: ; Put back the original source bank. Note that on a 1 R/W adapter, both banks
2746: ; will be mapped by this call, which is fine.
2747:
2748: ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
2749: <ebx,ulCurrentSrcScan,ulCurrentJustification,MapSourceBank>
2750:
2751: PLAIN_RET
2752:
2753:
2754: ;-----------------------------------------------------------------------;
2755: ; Copies an edge from the temp buffer (1 wide) to the screen.
2756: ; Entry:
2757: ; AH = bit mask setting for edge
2758: ; DH = VGA_BASE SHR 8
2759: ; EDI = destination address
2760: ; pTempPlane = temp buffer from which to copy
2761: ; ulBlockHeight = # of bytes to copy per plane
2762: ; ulNextScan = scan width
2763: ; Source and dest banks both pointing to destination
2764: ; Exit:
2765: ; EDI = next destination address
2766: ;
2767: ; Preserved: EBP
2768: ;-----------------------------------------------------------------------;
2769:
2770: align 4
2771: copy_buffered_edge_to_screen_1ws:
2772:
2773: mov pDestAddr,edi
2774:
2775: mov dl,GRAF_ADDR
2776: mov al,GRAF_BIT_MASK
2777: out dx,ax
2778:
2779: mov pTempEntry,offset copy_edge_from_buf_full_chunk_1ws
2780: ;entry point into unrolled loop to copy first
2781: ; chunk, assuming it's a full chunk
2782: mov ecx,pTempPlane ;temp buffer start (copy from here)
2783: mov ebx,ulBlockHeight ;total # of scans to copy
2784:
2785: ; Copy the edge in a series of chunks, to avoid flicker.
2786:
2787: copy_from_buffer_chunk_loop_1ws:
2788:
2789: sub ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
2790: ; a full chunk
2791: jge short @F ;do a full chunk
2792: add ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
2793: ; scans
2794: mov ebx,pfnCopyEdgesFromBufferEntry_1ws[-4][ebx*4]
2795: mov pTempEntry,ebx ;entry point into unrolled loop to copy desired
2796: ; chunk size
2797: sub ebx,ebx ;no scans after this
2798: @@:
2799: push ebx ;remember remaining scan count
2800:
2801: mov al,MM_C3 ;start by copying plane 3
2802: mov ebx,ulNextScan
2803:
2804: push ecx ;remember current temp buffer start
2805:
2806: mov dl,SEQ_DATA ;leave DX pointing to the Sequencer Data reg
2807:
2808: copy_from_buffer_plane_loop_1ws:
2809:
2810: ; Set Map Mask to enable writes to plane we're copying.
2811:
2812: out dx,al
2813:
2814: mov esi,ecx ;point to current plane's source byte
2815: add ecx,ulBlockHeight ;point to next plane's source byte
2816:
2817: mov edi,pDestAddr
2818:
2819: jmp pTempEntry ;copy the left edge
2820:
2821:
2822: ;-----------------------------------------------------------------------;
2823: ; Table of unrolled edge copy-from-buffer loop entry points. First entry
2824: ; point is to copy 1 byte, last entry point is to copy EDGE_CHUNK_SIZE
2825: ; bytes.
2826: ;-----------------------------------------------------------------------;
2827:
2828: pfnCopyEdgesFromBufferEntry_1ws label dword
2829: INDEX = 1
2830: rept EDGE_CHUNK_SIZE
2831: DEFINE_DD EDGE_FROM_BUFFER_1WS,%INDEX
2832: INDEX = INDEX+1
2833: endm
2834:
2835:
2836: ;-----------------------------------------------------------------------;
2837: ; Unrolled loop for copying a strip of edge bytes (1 wide) from the temp
2838: ; buffer.
2839: ;-----------------------------------------------------------------------;
2840:
2841: COPY_EDGE_FROM_BUFFER_1WS macro ENTRY_LABEL,ENTRY_INDEX
2842: &ENTRY_LABEL&ENTRY_INDEX&:
2843: mov ah,[esi] ;get byte to copy
2844: inc esi ;point to next source (temp buffer) byte
2845: xchg [edi],ah ;read before write so Bit Mask can operate
2846: ; VGA rotates during write
2847: add edi,ebx ;point to next dest (screen) scan
2848: endm ;-----------------------------------;
2849:
2850: ; EBX = scan line width
2851: ; ESI = source address to copy from (temp buffer)
2852: ; EDI = target address to copy to (screen)
2853: ; Bit Mask set to desired clipping
2854: ; Map Mask set to enable the desired plane for write
2855:
2856: align 4
2857: copy_edge_from_buf_full_chunk_1ws:
2858: UNROLL_LOOP COPY_EDGE_FROM_BUFFER_1WS, \
2859: EDGE_FROM_BUFFER_1WS,EDGE_CHUNK_SIZE
2860:
2861: ; Do next plane within this chunk, if any.
2862:
2863: shr al,1 ;advance to next plane
2864: jnz copy_from_buffer_plane_loop_1ws
2865:
2866: ; Remember where we left off, for next chunk.
2867:
2868: mov pDestAddr,edi
2869: pop ecx ;get back current temp buffer start
2870: add ecx,EDGE_CHUNK_SIZE ;point to next chunk's start
2871:
2872: ; Do next chunk within this bank block, if any.
2873:
2874: pop ebx ;retrieve remaining scan count
2875: and ebx,ebx ;any scans left?
2876: jnz copy_from_buffer_chunk_loop_1ws ;more scans to do
2877:
2878: PLAIN_RET
2879:
2880:
2881: ;-----------------------------------------------------------------------;
2882: ; Copies an edge from the screen (1 wide) to the temp buffer.
2883: ; Entry:
2884: ; ESI = source address
2885: ; pTempPlane = temp buffer from which to copy
2886: ; ulBlockHeight = # of bytes to copy per plane
2887: ; ulNextScan = scan width
2888: ; Source bank pointing to source
2889: ; Exit:
2890: ; DH = VGA_BASE SHR 8
2891: ; ESI = next source address
2892: ;
2893: ; Preserved: EBP
2894: ;-----------------------------------------------------------------------;
2895:
2896: align 4
2897: copy_screen_to_buffered_edge_1ws:
2898:
2899: mov pSrcAddr,esi
2900:
2901: ; Leave the GC Index pointing to the Read Map.
2902:
2903: mov edx,VGA_BASE + GRAF_ADDR
2904: mov al,GRAF_READ_MAP
2905: out dx,al
2906:
2907: mov ebx,ulBlockHeight
2908: SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyEdgeToTempEntry_1ws, \
2909: LOOP_UNROLL_SHIFT
2910: mov culTempCount,ebx ;remember # of unrolled loop iterations
2911: mov pTempEntry,ecx ;ditto for entry point
2912:
2913: mov ecx,ulNextScan
2914: mov edi,pTempPlane ;dest offset in temp buffer for plane 3 bytes.
2915: ;The rest of the planes are stored
2916: ; consecutively
2917: mov al,3 ;start by copying plane 3
2918: mov dl,GRAF_DATA ;leave DX pointing to the GC Data reg
2919: copy_edge_to_buffer_plane_loop_1ws:
2920: mov esi,pSrcAddr ;source pointer
2921:
2922: out dx,al ;set Read Map to plane we're copying from.
2923:
2924: mov ebx,culTempCount ;# of unrolled loop iterations
2925: jmp pTempEntry ;copy the edge bytes for this plane to the
2926: ; temp buffer
2927:
2928: ;-----------------------------------------------------------------------;
2929: ; Table of unrolled edge copy to temp buffer loop entry points.
2930: ;-----------------------------------------------------------------------;
2931:
2932: UNROLL_LOOP_ENTRY_TABLE pfnCopyEdgeToTempEntry_1WS, \
2933: EDGE_TO_TEMP_1WS, LOOP_UNROLL_COUNT
2934:
2935: ;-----------------------------------------------------------------------;
2936: ; Unrolled loop for copying edge bytes to the temp buffer.
2937: ;-----------------------------------------------------------------------;
2938:
2939: COPY_EDGE_TO_TEMP_1WS macro ENTRY_LABEL,ENTRY_INDEX
2940: &ENTRY_LABEL&ENTRY_INDEX&:
2941: mov ah,[esi] ;get byte to copy
2942: add esi,ecx ;point to next source scan
2943: mov [edi],ah ;copy byte to temp buffer
2944: inc edi ;point to next temp buffer byte
2945: endm ;-----------------------------------;
2946:
2947: ; EBX = count of unrolled loop iterations
2948: ; ECX = offset from end of one scan's fill to start of next
2949: ; ESI = source address to copy from (screen)
2950: ; EDI = target address to copy to (temp buffer)
2951: ; Read Map set to enable the desired plane for read
2952:
2953: align 4
2954: edge_to_buffer_loop_1ws:
2955: UNROLL_LOOP COPY_EDGE_TO_TEMP_1WS,EDGE_TO_TEMP_1WS, \
2956: LOOP_UNROLL_COUNT
2957: dec ebx
2958: jnz edge_to_buffer_loop_1ws
2959:
2960: dec al ;count down planes
2961: jns copy_edge_to_buffer_plane_loop_1ws
2962:
2963: PLAIN_RET
2964:
2965:
2966: ;-----------------------------------------------------------------------;
2967: ; Copies an edge from a 2-wide source to the destination on the screen.
2968: ; Entry:
2969: ; AH = bit mask setting for edge
2970: ; ESI = source address
2971: ; EDI = destination address
2972: ; ulBlockHeight = # of bytes to copy per plane
2973: ; ulNextScan = scan width
2974: ; ulCombineMask = masking to be applied before ORing the two source
2975: ; bytes together, to keep only the data needed in preparation
2976: ; for the VGA rotator doing its stuff
2977: ; Source readable, and destination readable and writable
2978: ; Exit:
2979: ; ESI = next source address
2980: ; EDI = next destination address
2981: ;
2982: ; Preserved: EBP
2983: ;-----------------------------------------------------------------------;
2984:
2985: align 4
2986: copy_edge_2ws:
2987: mov pSrcAddr,esi
2988: mov pDestAddr,edi
2989:
2990: ; Set the clip mask for this edge.
2991:
2992: mov edx,VGA_BASE + GRAF_ADDR
2993: mov al,GRAF_BIT_MASK
2994: out dx,ax
2995:
2996: ; Leave the GC Index pointing to the Read Map.
2997:
2998: mov al,GRAF_READ_MAP
2999: out dx,al
3000:
3001: mov ebx,ulBlockHeight
3002:
3003: mov ecx,offset copy_edge_rw_2ws_full_chunk
3004: ;entry point into unrolled loop assuming we do
3005: ; a full chunk the first time
3006:
3007: ; Copy the edge in a series of chunks.
3008:
3009: copy_edge_chunk_loop_2ws:
3010:
3011: sub ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
3012: ; a full chunk
3013: jge short @F ;do a full chunk
3014: add ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
3015: ; scans
3016: mov ecx,pfnCopyEdgeRWEntry_2ws[-4][ebx*4]
3017: ;entry point into unrolled loop to copy desired
3018: ; chunk size
3019: sub ebx,ebx ;no scans after this
3020: @@:
3021: push ebx ;remember remaining scan count
3022:
3023: mov eax,(MM_C3 SHL 8) + 3 ;start by copying plane 3
3024: mov ebx,ulNextScan
3025:
3026: copy_edge_plane_loop_2ws:
3027:
3028: push eax ;preserve plane info
3029:
3030: ; Set Read Map to enable reads from plane we're copying from.
3031:
3032: mov edx,VGA_BASE + GRAF_DATA
3033: out dx,al
3034:
3035: ; Set Map Mask to enable writes to plane we're copying.
3036:
3037: mov dl,SEQ_DATA
3038: mov al,ah
3039: out dx,al
3040:
3041: mov esi,pSrcAddr
3042: mov edi,pDestAddr
3043: mov edx,ulCombineMask
3044:
3045: jmp ecx ;copy the left edge
3046:
3047:
3048: ;-----------------------------------------------------------------------;
3049: ; Table of unrolled edge loop entry points. First entry point is to copy
3050: ; 1 byte, last entry point is to copy EDGE_CHUNK_SIZE bytes.
3051: ;-----------------------------------------------------------------------;
3052:
3053: pfnCopyEdgeRWEntry_2ws label dword
3054: INDEX = 1
3055: rept EDGE_CHUNK_SIZE
3056: DEFINE_DD EDGE_RW_2WS,%INDEX
3057: INDEX = INDEX+1
3058: endm
3059:
3060:
3061: ;-----------------------------------------------------------------------;
3062: ; Unrolled loop for copying a strip of edge bytes, with 2-wide source and
3063: ; destination both readable and writable.
3064: ;-----------------------------------------------------------------------;
3065:
3066: COPY_EDGE_RW_2WS macro ENTRY_LABEL,ENTRY_INDEX
3067: &ENTRY_LABEL&ENTRY_INDEX&:
3068: mov ax,[esi] ;get word to copy
3069: add esi,ebx ;point to next source scan
3070: and eax,edx ;mask in preparation for combining bytes
3071: or al,ah ;combine the desired parts of the bytes
3072: xchg [edi],al ;read before write so Bit Mask can operate
3073: ; VGA rotates during write
3074: add edi,ebx ;point to next dest scan
3075: endm ;-----------------------------------;
3076:
3077: ; EBX = scan line width
3078: ; EDX = mask to preserve desired portions of AH and AL before combining
3079: ; ESI = source address to copy from
3080: ; EDI = target address to copy to
3081: ; Bit Mask set to desired clipping
3082: ; Read Map and Map Mask set to enable the desired plane for read and write
3083:
3084: align 4
3085: copy_edge_rw_2ws_full_chunk:
3086: UNROLL_LOOP COPY_EDGE_RW_2WS,EDGE_RW_2WS,EDGE_CHUNK_SIZE
3087:
3088: ; Do next plane within this chunk, if any.
3089:
3090: pop eax ;retrieve plane info
3091:
3092: shr ah,1 ;advance to next plane
3093: dec eax ;count down planes
3094: jns copy_edge_plane_loop_2ws
3095:
3096: ; Remember where we left off, for the next chunk.
3097:
3098: mov pSrcAddr,esi
3099: mov pDestAddr,edi
3100:
3101: ; Do next chunk within this bank block, if any.
3102:
3103: pop ebx ;retrieve remaining scan count
3104: and ebx,ebx ;any scans left?
3105: jnz copy_edge_chunk_loop_2ws ;more scans to do
3106:
3107: PLAIN_RET
3108:
3109:
3110: ;-----------------------------------------------------------------------;
3111: ; Copies an edge from the temp buffer (2 wide) to the screen.
3112: ; Entry:
3113: ; AH = bit mask setting for edge
3114: ; EDI = destination address
3115: ; pTempPlane = temp buffer from which to copy
3116: ; ulBlockHeight = # of bytes to copy per plane
3117: ; ulNextScan = scan width
3118: ; Source and dest banks both pointing to destination
3119: ; ulCombineMask = masking to be applied before ORing the two source
3120: ; bytes together, to keep only the data needed in preparation
3121: ; for the VGA rotator doing its stuff
3122: ; Exit:
3123: ; EDI = next destination address
3124: ;
3125: ; Preserved: EBP
3126: ;-----------------------------------------------------------------------;
3127:
3128: align 4
3129: copy_buffered_edge_to_screen_2ws:
3130:
3131: mov pDestAddr,edi
3132:
3133: mov edx,VGA_BASE + GRAF_ADDR
3134: mov al,GRAF_BIT_MASK
3135: out dx,ax
3136:
3137: mov pTempEntry,offset copy_edge_from_buf_full_chunk_2ws
3138: ;entry point into unrolled loop, assuming the
3139: ; first chunk is full size
3140: mov ecx,pTempPlane ;temp buffer start (copy from here)
3141: mov ebx,ulBlockHeight
3142:
3143: ; Copy the edge in a series of chunks, to avoid flicker.
3144:
3145: copy_from_buffer_chunk_loop_2ws:
3146:
3147: sub ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
3148: ; a full chunk
3149: jge short @F ;do a full chunk
3150: add ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
3151: ; scans
3152: mov ebx,pfnCopyEdgesFromBufferEntry_2ws[-4][ebx*4]
3153: mov pTempEntry,ebx ;entry point into unrolled loop to copy final
3154: ; chunk size
3155: sub ebx,ebx ;no scans after this
3156: @@:
3157: push ebx ;remember remaining scan count
3158:
3159: mov al,MM_C3 ;start by copying plane 3
3160: mov ebx,ulNextScan
3161:
3162: push ecx ;remember current temp buffer start
3163:
3164: copy_from_buffer_plane_loop_2ws:
3165:
3166: ; Set Map Mask to enable writes to plane we're copying.
3167:
3168: mov edx,VGA_BASE + SEQ_DATA
3169: out dx,al
3170:
3171: push eax ;preserve plane info
3172:
3173: mov esi,ecx ;point to current plane's source word
3174: mov eax,ulBlockHeight
3175: lea ecx,[ecx+eax*2] ;point to next plane's source word
3176:
3177: mov edi,pDestAddr
3178: mov edx,ulCombineMask
3179:
3180: jmp pTempEntry ;copy the left edge
3181:
3182:
3183: ;-----------------------------------------------------------------------;
3184: ; Table of unrolled edge copy-from-buffer loop entry points. First entry
3185: ; point is to copy 1 byte, last entry point is to copy EDGE_CHUNK_SIZE
3186: ; bytes.
3187: ;-----------------------------------------------------------------------;
3188:
3189: pfnCopyEdgesFromBufferEntry_2WS label dword
3190: INDEX = 1
3191: rept EDGE_CHUNK_SIZE
3192: DEFINE_DD EDGE_FROM_BUFFER_2WS,%INDEX
3193: INDEX = INDEX+1
3194: endm
3195:
3196:
3197: ;-----------------------------------------------------------------------;
3198: ; Unrolled loop for copying a strip of edge bytes (1 wide) from the temp
3199: ; buffer.
3200: ;-----------------------------------------------------------------------;
3201:
3202: COPY_EDGE_FROM_BUFFER_2WS macro ENTRY_LABEL,ENTRY_INDEX
3203: &ENTRY_LABEL&ENTRY_INDEX&:
3204: mov ax,[esi] ;get word to copy
3205: add esi,2 ;point to next source (temp buffer) word
3206: and eax,edx ;mask in preparation for combining bytes
3207: or al,ah ;combine the desired parts of the bytes
3208: xchg [edi],al ;read before write so Bit Mask can operate
3209: ; VGA rotates during write
3210: add edi,ebx ;point to next dest (screen) scan
3211: endm ;-----------------------------------;
3212:
3213: ; EBX = scan line width
3214: ; EDX = mask to preserve desired portions of AH and AL before combining
3215: ; ESI = source address to copy from (temp buffer)
3216: ; EDI = target address to copy to (screen)
3217: ; Bit Mask set to desired clipping
3218: ; Map Mask set to enable the desired plane for write
3219:
3220: align 4
3221: copy_edge_from_buf_full_chunk_2ws:
3222: UNROLL_LOOP COPY_EDGE_FROM_BUFFER_2WS, \
3223: EDGE_FROM_BUFFER_2WS,EDGE_CHUNK_SIZE
3224:
3225: ; Do next plane within this chunk, if any.
3226:
3227: pop eax ;retrieve plane info
3228: shr al,1 ;advance to next plane
3229: jnz copy_from_buffer_plane_loop_2ws
3230:
3231: ; Remember where we left off, for next chunk.
3232:
3233: mov pDestAddr,edi
3234: pop ecx ;get back current temp buffer start
3235: add ecx,EDGE_CHUNK_SIZE*2 ;point to next chunk's start word
3236:
3237: ; Do next chunk within this bank block, if any.
3238:
3239: pop ebx ;retrieve remaining scan count
3240: and ebx,ebx ;any scans left?
3241: jnz copy_from_buffer_chunk_loop_2ws ;more scans to do
3242:
3243: PLAIN_RET
3244:
3245:
3246: ;-----------------------------------------------------------------------;
3247: ; Copies an edge from the screen (2 wide) to the temp buffer.
3248: ; Entry:
3249: ; ESI = source address
3250: ; pTempPlane = temp buffer from which to copy
3251: ; ulBlockHeight = # of bytes to copy per plane
3252: ; ulNextScan = scan width
3253: ; Source bank pointing to source
3254: ; Exit:
3255: ; ESI = next source address
3256: ;
3257: ; Preserved: EBP
3258: ;-----------------------------------------------------------------------;
3259:
3260: align 4
3261: copy_screen_to_buffered_edge_2ws:
3262:
3263: mov pSrcAddr,esi
3264:
3265: ; Leave the GC Index pointing to the Read Map.
3266:
3267: mov edx,VGA_BASE + GRAF_ADDR
3268: mov al,GRAF_READ_MAP
3269: out dx,al
3270:
3271: mov ebx,ulBlockHeight
3272: SET_UP_UNROLL_VARS ebx,ecx,ebx,pfnCopyEdgeToTempEntry_2ws, \
3273: LOOP_UNROLL_SHIFT
3274: mov culTempCount,ebx ;remember # of unrolled loop iterations
3275: mov pTempEntry,ecx ;ditto for entry point
3276:
3277: mov ecx,ulNextScan
3278: mov edi,pTempPlane ;dest offset in temp buffer for plane 3 bytes.
3279: ;The rest of the planes are stored
3280: ; consecutively
3281: mov eax,3 ;start by copying plane 3
3282: copy_edge_to_buf_pl_loop_2ws:
3283: mov esi,pSrcAddr ;source pointer
3284:
3285: mov edx,VGA_BASE + GRAF_DATA
3286: out dx,al ;set Read Map to plane from which we're copying
3287:
3288: mov ebx,culTempCount ;# of unrolled loop iterations
3289: jmp pTempEntry ;copy the edge bytes for this plane to the
3290: ; temp buffer
3291:
3292: ;-----------------------------------------------------------------------;
3293: ; Table of unrolled edge copy to temp buffer loop entry points.
3294: ;-----------------------------------------------------------------------;
3295:
3296: UNROLL_LOOP_ENTRY_TABLE pfnCopyEdgeToTempEntry_2WS, \
3297: EDGE_TO_TEMP_2WS, LOOP_UNROLL_COUNT
3298:
3299: ;-----------------------------------------------------------------------;
3300: ; Unrolled loop for copying edge bytes to the temp buffer.
3301: ;-----------------------------------------------------------------------;
3302:
3303: COPY_EDGE_TO_TEMP_2WS macro ENTRY_LABEL,ENTRY_INDEX
3304: &ENTRY_LABEL&ENTRY_INDEX&:
3305: mov dx,[esi] ;get byte to copy
3306: add esi,ecx ;point to next source scan
3307: mov [edi],dx ;copy byte to temp buffer
3308: add edi,2 ;point to next temp buffer byte
3309: endm ;-----------------------------------;
3310:
3311: ; EBX = count of unrolled loop iterations
3312: ; ECX = offset from end of one scan's fill to start of next
3313: ; ESI = source address to copy from (screen)
3314: ; EDI = target address to copy to (temp buffer)
3315: ; Read Map set to enable the desired plane for read
3316:
3317: align 4
3318: edge_to_buffer_loop_2ws:
3319: UNROLL_LOOP COPY_EDGE_TO_TEMP_2WS,EDGE_TO_TEMP_2WS, \
3320: LOOP_UNROLL_COUNT
3321: dec ebx
3322: jnz edge_to_buffer_loop_2ws
3323:
3324: dec eax ;count down planes
3325: jns copy_edge_to_buf_pl_loop_2ws
3326:
3327: PLAIN_RET
3328:
3329:
3330: ;-----------------------------------------------------------------------;
3331:
3332: endProc vNonAlignedSrcCopy
3333:
3334: _TEXT$04 ends
3335:
3336: end
3337:
3338:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.