|
|
1.1 root 1: ;---------------------------Module-Header------------------------------;
2: ; Module Name: vgablts.asm
3: ;
4: ; Copyright (c) 1992 Microsoft Corporation
5: ;-----------------------------------------------------------------------;
6: ;-----------------------------------------------------------------------;
7: ; VOID vTrgBlt(PDEVSURF pdsurf, ULONG culRcl, RECTL * prcl, MIX ulMix,
8: ; ULONG ulClr);
9: ; Input:
10: ; pdsurf - surface to which to draw
11: ; culRcl - # of rectangles to fill
12: ; prcl - pointer to list of rectangles to fill
13: ; ulMix - mix rop with which to fill
14: ; ulClr - color with which to fill
15: ;
16: ; Performs accelarated solid area fills for all mixes.
17: ;
18: ;-----------------------------------------------------------------------;
19: ;
20: ; Note: Assumes all rectangles have positive heights and widths. Will not
21: ; work properly if this is not the case.
22: ;
23: ;-----------------------------------------------------------------------;
24: ;
25: ; Note: Cases where the width of the whole bytes fill is equal to the
26: ; width of the bitmap could be sped up by using a single REP MOVS or REP
27: ; STOS, but how often does WIN32 do a fill that's the width of the screen?
28: ; Not very.
29: ;
30: ;-----------------------------------------------------------------------;
31:
32: comment $
33:
34: The overall approach of this module is to accept a list of rectangles to
35: fill, set up the VGA hardware for the desired fill, and then fill the
36: rectangles one at a time. Each rectangle fill is set up for everything
37: but vertical parameters, and then decomposed into the sections that
38: intersect each VGA bank; each section is drawn in turn. The drawing code
39: is heavily unrolled for performance, and vectors are set up so that the
40: drawing code appropriate for the desired fill is essentially threaded
41: together.
42:
43: commend $
44:
45: ;-----------------------------------------------------------------------;
46:
47: ; Set LOOP_UNROLL_SHIFT to the log2 of the number of times you want loops in
48: ; this module unrolled. For example, LOOP_UNROLL_SHIFT of 3 yields 2**3 = 8
49: ; times unrolling. This is the only thing you need to change to control
50: ; unrolling.
51:
52: LOOP_UNROLL_SHIFT equ 2
53:
54: ;-----------------------------------------------------------------------;
55:
56: .386
57:
58: ifndef DOS_PLATFORM
59: .model small,c
60: else
61: ifdef STD_CALL
62: .model small,c
63: else
64: .model small,pascal
65: endif; STD_CALL
66: endif; DOS_PLATFORM
67:
68: assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
69: assume fs:nothing,gs:nothing
70:
71: .xlist
72: include stdcall.inc ;calling convention cmacros
73: include i386\egavga.inc
74: include i386\strucs.inc
75: include i386\unroll.inc
76: include i386\ropdefs.inc
77:
78: .list
79:
80: ;-----------------------------------------------------------------------;
81:
82: .data
83:
84: ;-----------------------------------------------------------------------;
85: ; Left edge clip masks for intrabyte start addresses 0 through 7.
86: ; Whole byte cases are flagged as 0ffh.
87: public jLeftMask
88: jLeftMask label byte
89: db 0ffh,07fh,03fh,01fh,00fh,007h,003h,001h
90:
91: ;-----------------------------------------------------------------------;
92: ; Right edge clip masks for intrabyte end addresses (non-inclusive)
93: ; 0 through 7. Whole byte cases are flagged as 0ffh.
94: public jRightMask
95: jRightMask label byte
96: db 0ffh,080h,0c0h,0e0h,0f0h,0f8h,0fch,0feh
97:
98: ;-----------------------------------------------------------------------;
99: ; Tables used to set up for the desired raster op. Note that entries for raster
100: ; ops that aren't handled here are generally correct, except that they ignore
101: ; need for inversion of the destination, which those rops require.
102:
103: ; Table used to force off the drawing color for R2_BLACK (0).
104: ; The first entry is ignored; there is no mix 0.
105: public jForceOffTable
106: jForceOffTable db 0
107: db 0,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh
108: db 0ffh,0ffh, 0,0ffh,0ffh,0ffh,0ffh,0ffh
109:
110: ;-----------------------------------------------------------------------;
111: ; Table used to force on the drawing color for R2_NOT (Dn) and R2_WHITE (1).
112: ; The first entry is ignored; there is no mix 0.
113: public jForceOnTable
114: jForceOnTable db 0, 0,0,0,0,0,0ffh,0,0,0,0,0,0,0,0,0,0ffh
115:
116: ;-----------------------------------------------------------------------;
117: ; Table used to invert the passed-in drawing color for Pn mixes.
118: ; The first entry is ignored; there is no mix 0.
119: public jNotTable
120: jNotTable db 0, 0,0ffh,0ffh,0ffh,0,0,0,0ffh,0,0ffh,0,0ffh,0,0,0,0
121:
122: ;-----------------------------------------------------------------------;
123: ; Table of VGA ALU logical functions corresponding to mixes. Note that Dn is
124: ; handled as a separate preceding inversion pass when part of a more complex
125: ; mix.
126: ; The first entry is ignored; there is no mix 0.
127: public jALUFuncTable
128: jALUFuncTable db 0
129: db DR_SET,DR_AND,DR_AND,DR_SET
130: db DR_AND,DR_XOR,DR_XOR,DR_OR
131: db DR_AND,DR_XOR, 0,DR_OR
132: db DR_SET,DR_OR ,DR_OR ,DR_SET
133:
134: ;-----------------------------------------------------------------------;
135: ; 1 entries mark rops that require two passes, one to invert the destination
136: ; and then another to finish the rop.
137: ; The first entry is ignored; there is no mix 0.
138: public jInvertDest
139: jInvertDest db 0, 0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0
140:
141: ;-----------------------------------------------------------------------;
142: ; Table of routines to be called to draw edges, according to which edges are
143: ; partial and which edges are whole bytes.
144: align 4
145: pfnEdgeDrawing label dword
146: dd do_right_edge_bytes
147: dd do_both_edge_bytes
148: dd check_next_bank
149: dd do_left_edge_bytes
150:
151: ;-----------------------------------------------------------------------;
152: ; Table of pointers to tables used to find entries points in unrolled wide
153: ; whole byte code.
154:
155: align 4
156: pfnWideWholeRep label dword
157: dd pfnDrawWideW00Entry
158: dd pfnDrawWideW01Entry
159: dd pfnDrawWideW02Entry
160: dd pfnDrawWideW03Entry
161: dd pfnDrawWideW10Entry
162: dd pfnDrawWideW11Entry
163: dd pfnDrawWideW12Entry
164: dd pfnDrawWideW13Entry
165: dd pfnDrawWideW20Entry
166: dd pfnDrawWideW21Entry
167: dd pfnDrawWideW22Entry
168: dd pfnDrawWideW23Entry
169: dd pfnDrawWideW30Entry
170: dd pfnDrawWideW31Entry
171: dd pfnDrawWideW32Entry
172: dd pfnDrawWideW33Entry
173:
174: ;-----------------------------------------------------------------------;
175: ; Table of pointers to tables used to find entries points in narrow, special-
176: ; cased unrolled non-replace whole byte code.
177:
178: ; Note: The breakpoint where one should switch from special-casing to
179: ; REP MOVSB is purely a guess on my part. 5 seemed reasonable.
180:
181: align 4
182: pfnWholeBytesNonReplaceEntries label dword
183: dd 0 ;we never get a 0-wide case
184: dd pfnDraw1WideRWEntry
185: dd pfnDraw2WideRWEntry
186: dd pfnDraw3WideRWEntry
187: dd pfnDraw4WideRWEntry
188: MAX_NON_REPLACE_SPECIAL equ ($-pfnWholeBytesNonReplaceEntries)/4
189:
190: ;-----------------------------------------------------------------------;
191: ; Table of pointers to tables used to find entry points in narrow, special-
192: ; cased unrolled replace whole byte code.
193:
194: ; Note: The breakpoint where one should switch from special-casing to
195: ; REP STOS is purely a guess on my part. 8 seemed reasonable.
196:
197: ; Start address MOD 3 is 0.
198: align 4
199: pfnWholeBytesMod0ReplaceEntries label dword
200: dd 0 ;we never get a 0-wide case
201: dd pfnDraw1WideWEntry
202: dd pfnDraw2WideWEntry
203: dd pfnDraw3WideWEvenEntry
204: dd pfnDraw4WideWEntry
205: dd pfnDraw5WideWEvenEntry
206: dd pfnDraw6WideWMod3_0Entry
207: dd pfnDraw7WideWMod3_0Entry
208: dd pfnDraw8WideWMod3_0Entry
209: MAX_REPLACE_SPECIAL equ ($-pfnWholeBytesMod0ReplaceEntries)/4
210:
211: ; Start address MOD 3 is 1.
212: align 4
213: pfnWholeBytesMod1ReplaceEntries label dword
214: dd 0 ;we never get a 0-wide case
215: dd pfnDraw1WideWEntry
216: dd pfnDraw2WideWEntry
217: dd pfnDraw3WideWOddEntry
218: dd pfnDraw4WideWEntry
219: dd pfnDraw5WideWOddEntry
220: dd pfnDraw6WideWMod3_1Entry
221: dd pfnDraw7WideWMod3_1Entry
222: dd pfnDraw8WideWMod3_1Entry
223:
224: ; Start address MOD 3 is 2.
225: align 4
226: pfnWholeBytesMod2ReplaceEntries label dword
227: dd 0 ;we never get a 0-wide case
228: dd pfnDraw1WideWEntry
229: dd pfnDraw2WideWEntry
230: dd pfnDraw3WideWEvenEntry
231: dd pfnDraw4WideWEntry
232: dd pfnDraw5WideWEvenEntry
233: dd pfnDraw6WideWMod3_2Entry
234: dd pfnDraw7WideWMod3_2Entry
235: dd pfnDraw8WideWMod3_2Entry
236:
237: ; Start address MOD 3 is 3.
238: align 4
239: pfnWholeBytesMod3ReplaceEntries label dword
240: dd 0 ;we never get a 0-wide case
241: dd pfnDraw1WideWEntry
242: dd pfnDraw2WideWEntry
243: dd pfnDraw3WideWOddEntry
244: dd pfnDraw4WideWEntry
245: dd pfnDraw5WideWOddEntry
246: dd pfnDraw6WideWMod3_1Entry
247: dd pfnDraw7WideWMod3_3Entry
248: dd pfnDraw8WideWMod3_3Entry
249:
250: ; Master MOD 3 alignment look-up table for entry tables for four possible
251: ; alignments for narrow, special-cased unrolled replace whole byte code.
252: align 4
253: pfnWholeBytesReplaceMaster label dword
254: dd pfnWholeBytesMod0ReplaceEntries
255: dd pfnWholeBytesMod1ReplaceEntries
256: dd pfnWholeBytesMod2ReplaceEntries
257: dd pfnWholeBytesMod3ReplaceEntries
258:
259: ;-----------------------------------------------------------------------;
260:
261: .code
262:
263: _TEXT$01 SEGMENT DWORD USE32 PUBLIC 'CODE'
264: ASSUME CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
265:
266: ;-----------------------------------------------------------------------;
267:
268: cProc vTrgBlt,20,< \
269: uses esi edi ebx, \
270: pdsurf: ptr DEVSURF, \
271: culRcl: dword, \
272: prcl: ptr RECTL, \
273: ulMix: dword, \
274: ulColor:dword >
275:
276: local ulRowOffset :dword ;Offset from start of scan line of
277: ; first byte to fill
278: local ulWholeBytes :dword ;# of whole bytes to fill
279: local ulWholeDwords :dword ;# of whole dwords to fill
280: local pfnWholeFn :dword ;pointer to routine used to draw
281: ; whole bytes
282: local ulScanWidth :dword ;offset from start of one scan to start
283: ; of next
284: local ulNextScan :dword ;offset from end of one scan line's
285: ; fill to start of next
286: local ulCurrentTopScan :dword ;top scan line to fill in current bank
287: local ulMasks :dword ;low byte = right mask, high byte =
288: ; left mask
289: local ulBottomScan :dword ;bottom scan line of fill rectangle
290: local pfnDraw1WideVector :dword ;address at which to enter unrolled
291: ; edge loop
292: local jALUFunc :dword ;VGA ALU logical operation (SET, AND,
293: ; OR, or XOR)
294: local pfnStartDrawing :dword ;pointer to function to call to start
295: ; drawing
296: local pfnContinueDrawing :dword ;pointer to function to call to
297: ; continue drawing after doing whole
298: ; bytes
299: local ulLeftEdgeAdjust :dword ;used to bump the whole bytes start
300: ; address past the left edge when the
301: ; left edge is partial
302: local pfnWholeBytes :dword ;pointer to table of entry points
303: ; into unrolled loops for whole byte
304: ; filling
305: local jInvertDestFirst :dword ;1 if the rop requires a pass to invert
306: ; the destination before the normal
307: ; pass
308: local jDrawingColor :dword ;color with which we're drawing
309: ; dword to finish out fill
310:
311: ;-----------------------------------------------------------------------;
312:
313: cld
314:
315: ;-----------------------------------------------------------------------;
316: ; Make sure there's something to draw; clip enumerations can be empty.
317: ;-----------------------------------------------------------------------;
318:
319: cmp culRcl,0 ;any rects to fill?
320: jz vTrgBlts_done ;no, we're done
321:
322:
323: ;-----------------------------------------------------------------------;
324: ; Set up variables that are constant for the entire time we're in this
325: ; module.
326: ;-----------------------------------------------------------------------;
327:
328: ;-----------------------------------------------------------------------;
329: ; Set up for the desired raster op.
330: ;-----------------------------------------------------------------------;
331:
332: sub ebx,ebx ;ignore any background mix; we're only
333: mov bl,byte ptr ulMix ; concerned with the foreground in this
334: ; module
335: cmp ebx,R2_NOP ;is this NOP?
336: jz vTrgBlts_done ;yes, we're done
337: mov al,jInvertDest[ebx] ;remember whether we need to
338: mov byte ptr jInvertDestFirst,al ; invert the destination before
339: ; finishing the rop
340: mov ah,byte ptr ulColor ;get the drawing color
341: and ah,jForceOffTable[ebx] ;force color to 0 if necessary
342: ; (R2_BLACK)
343: or ah,jForceOnTable[ebx] ;force color to 0ffh if necessary
344: ; (R2_WHITE, R2_NOT)
345: xor ah,jNotTable[ebx] ;invert color if necessary (any Pn mix)
346: ;at this point, CH has the color we
347: ; want to draw with; set up the VGA
348: ; hardware to draw with that color
349: mov byte ptr jDrawingColor,ah ;remember drawing color for restoring
350: ; after inversion
351: mov edx,VGA_BASE + GRAF_ADDR
352: mov al,GRAF_SET_RESET ;set/reset = color to write
353: out dx,ax
354: mov eax,0F00h + GRAF_ENAB_SR ;enable set/reset for all planes, so
355: out dx,ax ; set/reset color we just set becomes
356: ; the drawing color, regardless of the
357: ; value written by the CPU
358:
359: mov ah,jALUFuncTable[ebx] ;get the ALU logical function
360: and ah,ah ;is the logical function DR_SET?
361: .errnz DR_SET
362: jz short skip_ALU_set ;yes, don't have to set because that's
363: ; the VGA's default state
364: mov al,GRAF_DATA_ROT
365: out dx,ax ;set the ALU logical function
366: skip_ALU_set:
367: mov byte ptr jALUFunc,ah ;remember the ALU logical function
368:
369: mov eax,GRAF_MODE + ((M_AND_WRITE + M_COLOR_READ) SHL 8)
370: out dx,ax ;write mode 3 so we can do the masking
371: ; without OUTs, read mode 1 so we can
372: ; read 0xFF from memory always, for
373: ; ANDing (because Color Don't Care is
374: ; all zeros)
375:
376: ;-----------------------------------------------------------------------;
377: ; Fill the current rectangle with the specified raster op and color.
378: ;-----------------------------------------------------------------------;
379:
380: fill_rect_loop:
381:
382: ;-----------------------------------------------------------------------;
383: ; Set up variables that are constant from bank to bank during a single
384: ; fill.
385: ;-----------------------------------------------------------------------;
386:
387: ;-----------------------------------------------------------------------;
388: ; Set up masks and widths.
389: ;-----------------------------------------------------------------------;
390:
391: mov edi,prcl ;point to rectangle to fill
392: mov eax,[edi].yBottom
393: mov ulBottomScan,eax ;remember the bottom scan line of fill
394:
395: mov ebx,[edi].xRight ;right edge of fill (non-inclusive)
396: mov ecx,ebx
397: and ecx,0111b ;intrabyte address of right edge
398: mov ah,jRightMask[ecx] ;right edge mask
399:
400: mov esi,[edi].xLeft ;left edge of fill (inclusive)
401: mov ecx,esi
402: shr ecx,3 ;/8 for start offset from left edge
403: ; of scan line
404: mov ulRowOffset,ecx ;remember offset from start of scan
405: ; line
406: sub ebx,esi ;width in pixels of fill
407:
408: and esi,0111b ;intrabyte address of left edge
409: mov al,jLeftMask[esi] ;left edge mask
410:
411: dec ebx ;make inclusive on right
412: add ebx,esi ;inclusive width, starting counting at
413: ; the beginning of the left edge byte
414: shr ebx,3 ;width of fill in bytes touched - 1
415: jnz short more_than_1_byte ;more than 1 byte is involved
416:
417: ; Only one byte will be affected. Combine first/last masks.
418:
419: and al,ah ;we'll use first byte mask only
420: xor ah,ah ;want last byte mask to be 0
421: inc ebx ;so there's one count to subtract below
422: ; if this isn't a whole edge byte
423: more_than_1_byte:
424:
425: ; If all pixels in the left edge are altered, combine the first byte into the
426: ; whole byte count and clear the first byte mask, because we can handle solid
427: ; edge bytes faster as part of the whole bytes. Ditto for the right edge.
428:
429: sub ecx,ecx ;edge whole-status accumulator
430: cmp al,-1 ;is left edge a whole byte or partial?
431: adc ecx,ecx ;ECX=1 if left edge partial, 0 if whole
432: sub ebx,ecx ;if left edge partial, deduct it from
433: ; the whole bytes count
434: mov ulLeftEdgeAdjust,ecx ;for skipping over the left edge if
435: ; it's partial when pointing to the
436: ; whole bytes
437: and ah,ah ;is right edge mask 0, meaning this
438: ; fill is only 1 byte wide?
439: jz short save_masks ;yes, no need to do anything
440: cmp ah,-1 ;is right edge a whole byte or partial?
441: jnz short save_masks ;partial
442: add ecx,2 ;bit 1 of ECX=0 if right edge partial,
443: ; 1 if whole;
444: ;bit 1=0 if left edge partial, 1 whole
445: inc ebx ;if right edge whole, include it in the
446: ; whole bytes count
447: save_masks:
448: mov ulMasks,eax ;save left and right clip masks
449: mov ulWholeBytes,ebx ;save # of whole bytes
450:
451: mov ecx,pfnEdgeDrawing[ecx*4] ;set address of routine to draw
452: mov pfnContinueDrawing,ecx ; all partial (non-whole) edges
453:
454: and ebx,ebx ;any whole bytes?
455: jz short start_vec_set ;no
456: ;yes, so draw the whole bytes before
457: ; the edge bytes
458:
459: ; The whole bytes loop depends on the type of operation being done. If the
460: ; operation is one which uses DR_SET, then we can use a STOS-type operation,
461: ; else we have to use a MOVSB-type operation (to load the latches with the
462: ; existing contents of display memory to allow the ALUs to work).
463:
464: cmp byte ptr jALUFunc,DR_SET ;is it a replace-type rop?
465: jz short is_replace_type ;yes
466: ;no, set up for non-replace whole bytes
467: mov ecx,offset whole_bytes_non_replace_wide
468: ;assume too wide to special-case
469: cmp ebx,MAX_NON_REPLACE_SPECIAL ;too wide to special case?
470: jnb short start_vec_set ;yes
471: mov ecx,pfnWholeBytesNonReplaceEntries[ebx*4] ;no, point to entry
472: mov pfnWholeBytes,ecx ; table for width
473: mov ecx,offset whole_bytes_special
474: ;set up to call special routine to fill
475: ; whole bytes
476: jmp short start_vec_set
477:
478: align 4
479: is_replace_type: ;set up for replace-type rop
480: cmp ebx,MAX_REPLACE_SPECIAL ;too wide to special case?
481: jnb short is_wide_replace ;yes
482: ;narrow enough to special case. Look up
483: ; the entry table for the special case
484: ; base on the start alignment
485: mov ecx,ulRowOffset
486: add ecx,ulLeftEdgeAdjust ;left edge whole bytes start offset
487: and ecx,011b ;left edge whole bytes start alignment
488: ; MOD 3
489: mov ecx,pfnWholeBytesReplaceMaster[ecx*4] ;look up table of entry
490: ; tables for alignment
491: mov ecx,[ecx+ebx*4] ;look up entry table for width
492: mov pfnWholeBytes,ecx ; table for width
493: mov ecx,offset whole_bytes_special
494: ;set up to call special routine to fill
495: ; whole bytes
496: jmp short start_vec_set
497:
498: align 4
499: is_wide_replace: ;set up for wide replace-type op
500: ;Note: assumes there is at least one
501: ; full dword involved!
502: mov ecx,ulRowOffset
503: add ecx,ulLeftEdgeAdjust ;left edge whole bytes start offset
504: neg ecx
505: and ecx,011b
506: mov edx,ebx
507: sub edx,ecx ;ignore odd leading bytes
508: mov eax,edx
509: shr edx,2 ;# of whole dwords across (not counting
510: ; odd leading & trailing bytes)
511: mov ulWholeDwords,edx
512: and eax,011b ;# of odd (fractional) trailing bytes
513: shl ecx,2
514: or ecx,eax ;build a look-up index from the number
515: ; of leading and trailing bytes
516: mov ecx,pfnWideWholeRep[ecx*4] ;proper drawing handler for front/
517: mov pfnWholeBytes,ecx ; back alignment
518: mov ecx,offset whole_bytes_rep_wide
519: ;set up to call routine to perform wide
520: ; whole bytes fill
521: start_vec_set:
522: mov pfnStartDrawing,ecx ; all partial (non-whole) edges
523:
524: mov ecx,pdsurf
525: mov eax,[ecx].dsurf_lNextScan
526: mov ulScanWidth,eax ;local copy of scan line width
527: sub eax,ebx ;EAX = delta to next scan
528: mov ulNextScan,eax
529:
530:
531: ;-----------------------------------------------------------------------;
532: ; Fill this rectangle.
533: ;-----------------------------------------------------------------------;
534:
535: cmp byte ptr jInvertDestFirst,1
536: ;is this an invert-dest-plus-something-
537: ; else rop that requires two passes?
538: jz short do_invert_dest_rop ;yes, special case with two passes
539:
540: do_single_pass:
541: call draw_banks
542:
543:
544: ;-----------------------------------------------------------------------;
545: ; See if there are any more rectangles to fill.
546: ;-----------------------------------------------------------------------;
547:
548: add prcl,(size RECTL) ;point to the next rectangle, if there is one
549: dec culRcl ;count down the rectangles to fill
550: jnz fill_rect_loop
551:
552:
553: ;-----------------------------------------------------------------------;
554: ; We have filled all rectangles. Restore the VGA to its default state.
555: ;-----------------------------------------------------------------------;
556:
557: mov edx,VGA_BASE + GRAF_ADDR
558: mov eax,0000h + GRAF_ENAB_SR ;disable set/reset
559: out dx,ax
560: mov eax,GRAF_MODE + ((M_PROC_WRITE + M_DATA_READ) SHL 8)
561: out dx,ax ;restore read mode 0 and write mode 0
562: cmp byte ptr jALUfunc,DR_SET ;is the logical function already SET?
563: jz short vTrgBlts_done ;yes, no need to reset it
564: mov eax,(DR_SET shl 8) + GRAF_DATA_ROT ;set the logical function to
565: out dx,ax ; SET
566: vTrgBlts_done:
567: cRet vTrgBlt
568:
569:
570: ;-----------------------------------------------------------------------;
571: ; Handles rops that require two passes, the first being a destination
572: ; inversion pass.
573: ;-----------------------------------------------------------------------;
574:
575: align 4
576: do_invert_dest_rop:
577:
578: ; Set up the VGA's hardware for inversion
579:
580: mov edx,VGA_BASE + GRAF_ADDR
581: mov eax,0ff00h + GRAF_SET_RESET ;set/reset = 0ffh to invert in
582: out dx,ax ; conjunction with XOR
583: mov eax,(DR_XOR shl 8) + GRAF_DATA_ROT
584: out dx,ax ;logical function = XOR to invert
585:
586: ; Invert the destination
587:
588: call draw_banks
589:
590: ; Restore the VGA's hardware to the state required for the second pass.
591:
592: mov edx,VGA_BASE + GRAF_ADDR
593: mov ah,byte ptr jDrawingColor
594: mov al,GRAF_SET_RESET ;set/reset = color to write
595: out dx,ax
596: mov ah,byte ptr jALUFunc
597: mov al,GRAF_DATA_ROT
598: out dx,ax ;set the ALU logical function
599:
600: ; Perform the second pass to finish the rop.
601:
602: jmp do_single_pass
603:
604:
605: ;-----------------------------------------------------------------------;
606: ; Fills all banks in the current fill rectangle. Called once per fill
607: ; rectangle, except for destination-inversion-plus-something-else rops.
608: ;-----------------------------------------------------------------------;
609:
610: align 4
611: draw_banks:
612:
613: ;-----------------------------------------------------------------------;
614: ; Map in the bank containing the top scan to fill, if it's not mapped in
615: ; already.
616: ;-----------------------------------------------------------------------;
617:
618: mov edi,prcl ;point to rectangle to fill
619: mov ecx,pdsurf ;point to surface
620: mov eax,[edi].yTop ;top scan line of fill
621: mov ulCurrentTopScan,eax ;this will be the fill top in 1st bank
622:
623: cmp eax,[ecx].dsurf_rcl1WindowClip.yTop ;is fill top less than
624: ; current bank?
625: jl short map_init_bank ;yes, map in proper bank
626: cmp eax,[ecx].dsurf_rcl1WindowClip.yBottom ;fill top greater than
627: ; current bank?
628: jl short init_bank_mapped ;no, proper bank already mapped
629: map_init_bank:
630:
631: ; Map in the bank containing the top scan line of the fill.
632:
633: ptrCall <dword ptr [ecx].dsurf_pfnBankControl>,<ecx,eax,JustifyTop>
634:
635: init_bank_mapped:
636:
637: ;-----------------------------------------------------------------------;
638: ; Main loop for processing fill in each bank.
639: ;-----------------------------------------------------------------------;
640:
641: ; Compute the starting address and scan line count for the initial bank.
642:
643: mov eax,pdsurf ;EAX->target surface
644: mov ebx,ulBottomScan ;bottom of destination rectangle
645: cmp ebx,[eax].dsurf_rcl1WindowClip.yBottom
646: ;which comes first, the bottom of the
647: ; dest rect or the bottom of the
648: ; current bank?
649: jl short BottomScanSet ;fill bottom comes first, so draw to
650: ; that; this is the last bank in fill
651: mov ebx,[eax].dsurf_rcl1WindowClip.yBottom
652: ;bank bottom comes first; draw to
653: ; bottom of bank
654: BottomScanSet:
655: mov edi,ulCurrentTopScan ;top scan line to fill in current bank
656: sub ebx,edi ;# of scans to fill in bank
657: imul edi,ulScanWidth ;offset of starting scan line
658:
659: ; Note that the start of the bitmap will change each time through the
660: ; bank loop, because the start of the bitmap is varied to map the
661: ; desired scan line to the banking window.
662:
663: add edi,[eax].dsurf_pvBitmapStart ;start of scan in bitmap
664: add edi,ulRowOffset ;EDI = start offset of fill in bitmap
665:
666: ; We have computed the starting address and scan count. Time to start drawing
667: ; in the initial bank.
668:
669: jmp pfnStartDrawing
670:
671:
672: ;-----------------------------------------------------------------------;
673: ; Whole byte fills.
674: ;-----------------------------------------------------------------------;
675:
676: ;-----------------------------------------------------------------------;
677: ; Handles non-replace whole byte fills wider than the maximum special
678: ; case width.
679: ;
680: ; The destination is not involved, so a STOS (or equivalent) can be used
681: ; (no read needed before write).
682: ;-----------------------------------------------------------------------;
683:
684: align 4
685: whole_bytes_rep_wide:
686: push ebx ;save scan count
687: push edi ;save starting address
688:
689: mov eax,pfnWholeBytes ;point to entry table for unrolled
690: ; loop for whole byte width
691: SET_UP_UNROLL_VARS ebx,ecx, ebx,[eax], LOOP_UNROLL_SHIFT
692: add edi,ulLeftEdgeAdjust ;point to first whole byte to fill
693: mov esi,ulWholeDwords ;whole dwords width
694: mov eax,-1 ;this will become the Bit Mask,
695: ; enabling drawing to all bits
696: mov edx,ulNextScan ;offset from end of one scan line to
697: ; start of next
698: call ecx ;draw the wide whole bytes
699:
700: pop edi ;restore screen pointer
701: pop ebx ;restore fill scan count
702: jmp pfnContinueDrawing ;either keep drawing or we're done
703:
704:
705: ;-----------------------------------------------------------------------;
706: ; Handles both replace and non-replace whole byte fills narrow enough to
707: ; special case.
708: ;-----------------------------------------------------------------------;
709:
710: align 4
711: whole_bytes_special:
712: push ebx ;save scan count
713: push edi ;save starting address
714:
715: mov eax,pfnWholeBytes ;point to entry table for unrolled
716: ; loop for whole byte width
717: SET_UP_UNROLL_VARS ebx,edx, ebx,[eax], LOOP_UNROLL_SHIFT
718: add edi,ulLeftEdgeAdjust ;point to first whole byte to fill
719: mov ecx,ulScanWidth ;offset to next scan line
720: mov eax,-1 ;this will become the Bit Mask,
721: ; enabling drawing to all bits
722: call edx ;draw the whole bytes
723:
724: pop edi ;restore screen pointer
725: pop ebx ;restore fill scan count
726: jmp pfnContinueDrawing ;either keep drawing or we're done
727:
728:
729: ;-----------------------------------------------------------------------;
730: ; Handles non-replace whole byte fills wider than the maximum special case
731: ; width.
732: ;
733: ; The destination is involved, so a MOVSB (or equivalent) must be
734: ; performed in order to do a read before write to give the ALUs something
735: ; to work with.
736: ;-----------------------------------------------------------------------;
737:
738: align 4
739: whole_bytes_non_replace_wide:
740: push ebx ;save scan count
741: push edi ;save starting address
742:
743: SET_UP_UNROLL_VARS ebx,ecx, ebx,pfnDrawWideRWEntry, LOOP_UNROLL_SHIFT
744: add edi,ulLeftEdgeAdjust ;point to first whole byte to fill
745: mov eax,ulWholeBytes ;whole bytes width
746: mov edx,ulNextScan ;offset from end of one scan line to
747: ; start of next
748: call ecx ;draw the wide whole bytes
749:
750: pop edi ;restore screen pointer
751: pop ebx ;restore fill scan count
752: jmp pfnContinueDrawing ;either keep drawing or we're done
753:
754:
755: ;-----------------------------------------------------------------------;
756: ; Process any left/right columns that that have to be done.
757: ;
758: ; Currently:
759: ; EBX = height to fill, in scans
760: ; EDI --> first byte of left edge
761: ;-----------------------------------------------------------------------;
762:
763: ;-----------------------------------------------------------------------;
764: ; Handle case where both edges are partial (non-whole) bytes.
765: ;-----------------------------------------------------------------------;
766: align 4
767: public do_both_edge_bytes
768: do_both_edge_bytes:
769:
770: ; Set up variables for entering unrolled loop.
771:
772: SET_UP_UNROLL_VARS ebx,edx, ebx,pfnDraw1WideRWEntry, LOOP_UNROLL_SHIFT
773: mov pfnDraw1WideVector,edx
774:
775: mov ecx,ulScanWidth ;offset from one scan to next
776:
777: mov esi,ulWholeBytes ;ESI = # of whole bytes
778: lea esi,[esi+edi+1] ;--> start for right edge
779: mov al,byte ptr ulMasks ;this will become the Bit Mask for the
780: ; left edge
781: push ebx ;preserve scan line count
782: call edx ;jump into the unrolled loop to draw
783: pop ebx ;restore scan line count
784:
785: mov edi,esi ;point to first right edge byte
786: mov al,byte ptr ulMasks+1 ;this will become the Bit Mask for the
787: ; right edge
788: push offset edges_done ;return here
789: jmp pfnDraw1WideVector ;jump into the unrolled loop to draw
790:
791: ;-----------------------------------------------------------------------;
792: ; Handle case where only the left edge is partial (non-whole).
793: ;-----------------------------------------------------------------------;
794: align 4
795: do_left_edge_bytes:
796:
797: ; Set up variables for entering unrolled loop.
798:
799: SET_UP_UNROLL_VARS ebx,esi, ebx,pfnDraw1WideRWEntry, LOOP_UNROLL_SHIFT
800:
801: mov ecx,ulScanWidth ;offset from one scan to next
802: mov al,byte ptr ulMasks ;this will become the Bit Mask for the
803: ; left edge
804: push offset edges_done ;return here
805: jmp esi ;jump into the unrolled loop to draw
806:
807: ;-----------------------------------------------------------------------;
808: ; Handle case where only the right edge is partial (non-whole).
809: ;-----------------------------------------------------------------------;
810: align 4
811: do_right_edge_bytes:
812:
813: ; Set up variables for entering unrolled loop.
814:
815: SET_UP_UNROLL_VARS ebx,esi, ebx,pfnDraw1WideRWEntry, LOOP_UNROLL_SHIFT
816:
817: mov ecx,ulScanWidth ;offset from one scan to next
818: add edi,ulWholeBytes ;--> start for right edge (remember,
819: ; left edge is whole, so the left edge
820: ; byte is included in the whole byte
821: ; count)
822: mov al,byte ptr ulMasks+1 ;this will become the Bit Mask for the
823: ; right edge
824: call esi ;jump into the unrolled loop to draw
825:
826: ;-----------------------------------------------------------------------;
827: ; We have done all partial edges.
828: ;-----------------------------------------------------------------------;
829:
830: edges_done:
831:
832: ;-----------------------------------------------------------------------;
833: ; See if there are any more banks to process.
834: ;-----------------------------------------------------------------------;
835:
836: check_next_bank:
837:
838: mov edi,pdsurf
839: mov eax,[edi].dsurf_rcl1WindowClip.yBottom ;is the fill bottom in
840: cmp ulBottomScan,eax ; the current bank?
841: jle short banks_done ;yes, so we're done
842: ;no, map in the next bank and fill it
843: mov ulCurrentTopScan,eax ;remember where the top of the bank
844: ; we're about to map in is (same as
845: ; bottom of bank we just did)
846:
847: ptrCall <dword ptr [edi].dsurf_pfnBankControl>,<edi,eax,JustifyTop>
848: ;map in the bank
849:
850: ; Compute the starting address and scan line count in this bank.
851:
852: mov eax,pdsurf ;EAX->target surface
853: mov ebx,ulBottomScan ;bottom of destination rectangle
854: cmp ebx,[eax].dsurf_rcl1WindowClip.yBottom
855: ;which comes first, the bottom of the
856: ; dest rect or the bottom of the
857: ; current bank?
858: jl short BottomScanSet2 ;fill bottom comes first, so draw to
859: ; that; this is the last bank in fill
860: mov ebx,[eax].dsurf_rcl1WindowClip.yBottom
861: ;bank bottom comes first; draw to
862: ; bottom of bank
863: BottomScanSet2:
864: mov edi,ulCurrentTopScan ;top scan line to fill in current bank
865: sub ebx,edi ;# of scans to fill in bank
866: imul edi,ulScanWidth ;offset of starting scan line
867:
868: ; Note that the start of the bitmap will change each time through the
869: ; bank loop, because the start of the bitmap is varied to map the
870: ; desired scan line to the banking window.
871:
872: add edi,[eax].dsurf_pvBitmapStart ;start of scan in bitmap
873: add edi,ulRowOffset ;EDI = start offset of fill in bitmap
874:
875: ; Draw in the new bank.
876:
877: jmp pfnStartDrawing
878:
879:
880: ;-----------------------------------------------------------------------;
881: ; Done with all banks in this fill.
882:
883: banks_done:
884: PLAIN_RET
885:
886: endProc vTrgBlt
887:
888:
889: ;-----------------------------------------------------------------------;
890: ; Unrolled loops.
891: ; There are two kinds of unrolled loops: read-before-write (to load the
892: ; latches), and write-only (for replace-type rops).
893: ;-----------------------------------------------------------------------;
894:
895:
896: ;-----------------------------------------------------------------------;
897: ; Unrolled drawing stuff for cases where read before write is required,
898: ; to load the latches.
899: ;-----------------------------------------------------------------------;
900:
901: ; Tables of entry points into unrolled 1-, 2-, 3-, and 4-wide, and 5-or-wider
902: ; read before write loops.
903:
904: UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideRWEntry,RW1,LOOP_UNROLL_COUNT
905: UNROLL_LOOP_ENTRY_TABLE pfnDraw2WideRWEntry,RW2,LOOP_UNROLL_COUNT
906: UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideRWEntry,RW3,LOOP_UNROLL_COUNT
907: UNROLL_LOOP_ENTRY_TABLE pfnDraw4WideRWEntry,RW4,LOOP_UNROLL_COUNT
908: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideRWEntry,RWWIDE,LOOP_UNROLL_COUNT
909:
910: ;-----------------------------------------------------------------------;
911: ; Unrolled 1-, 2-, 3-, and 4-wide read before write drawing loops.
912: ;
913: ; Entry:
914: ; AL = pixel mask
915: ; EBX = unrolled loop count
916: ; ECX = scan line width in bytes
917: ; EDI = start offset
918: ;
919: ; EBX, EDI modified. All other registers preserved.
920:
921: ;-----------------------------------------------------------------------;
922: ; Macro to draw one read before write byte, then advance to next scan line.
923:
924: DRAW_1_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
925: &ENTRY_LABEL&ENTRY_INDEX&:
926: and [edi],al ;we always read 0xFF, so AL is written
927: ; as-is; because we're in write mode 3,
928: ; AL becomes the Bit Mask
929: add edi,ecx ;point to the next scan line
930: endm ;-----------------------------------;
931:
932: ; 1-wide read/write.
933:
934: align 4
935: draw_1_wide_rw_loop proc near
936: UNROLL_LOOP DRAW_1_WIDE_RW,RW1,LOOP_UNROLL_COUNT
937: dec ebx
938: jnz draw_1_wide_rw_loop
939:
940: ret
941:
942: draw_1_wide_rw_loop endp
943:
944: ;-----------------------------------------------------------------------;
945: ; Macro to draw two read before write bytes, then advance to next scan line.
946:
947: DRAW_2_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
948: &ENTRY_LABEL&ENTRY_INDEX&:
949: and [edi],al
950: and [edi+1],al
951: add edi,ecx ;point to the next scan line
952: endm ;-----------------------------------;
953:
954: ; 2-wide read/write.
955:
956: align 4
957: draw_2_wide_rw_loop proc near
958: UNROLL_LOOP DRAW_2_WIDE_RW,RW2,LOOP_UNROLL_COUNT
959: dec ebx
960: jnz draw_2_wide_rw_loop
961:
962: ret
963:
964: draw_2_wide_rw_loop endp
965:
966: ;-----------------------------------------------------------------------;
967: ; Macro to draw three read before write bytes, then advance to next scan line.
968:
969: DRAW_3_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
970: &ENTRY_LABEL&ENTRY_INDEX&:
971: and [edi],al
972: and [edi+1],al
973: and [edi+2],al
974: add edi,ecx ;point to the next scan line
975: endm ;-----------------------------------;
976:
977: ; 3-wide read/write.
978:
979: align 4
980: draw_3_wide_rw_loop proc near
981: UNROLL_LOOP DRAW_3_WIDE_RW,RW3,LOOP_UNROLL_COUNT
982: dec ebx
983: jnz draw_3_wide_rw_loop
984:
985: ret
986:
987: draw_3_wide_rw_loop endp
988:
989: ;-----------------------------------------------------------------------;
990: ; Macro to draw four read before write bytes, then advance to next scan line.
991:
992: DRAW_4_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
993: &ENTRY_LABEL&ENTRY_INDEX&:
994: and [edi],al
995: and [edi+1],al
996: and [edi+2],al
997: and [edi+3],al
998: add edi,ecx ;point to the next scan line
999: endm ;-----------------------------------;
1000:
1001: ; 4-wide read/write.
1002:
1003: align 4
1004: draw_4_wide_rw_loop proc near
1005: UNROLL_LOOP DRAW_4_WIDE_RW,RW4,LOOP_UNROLL_COUNT
1006: dec ebx
1007: jnz draw_4_wide_rw_loop
1008:
1009: ret
1010:
1011: draw_4_wide_rw_loop endp
1012:
1013: ;-----------------------------------------------------------------------;
1014: ; Unrolled 5-or-wider read before write loop.
1015: ;
1016: ; Entry:
1017: ; EAX = # of bytes to fill across scan line (needed only by 5-or-wider
1018: ; handler)
1019: ; EBX = unrolled loop count
1020: ; EDX = offset from end of one scan line to the start of the next next
1021: ; EDI = start offset
1022: ;
1023: ; EBX, ECX, ESI, EDI modified. All other registers preserved.
1024:
1025: ;-----------------------------------------------------------------------;
1026: ; Macro to draw five or more read before write bytes, then advance to
1027: ; next scan line. (Actually, will handle any number of bytes,
1028: ; including 0, but there are special-case handlers for narrow cases.)
1029: ; Works because reads of display memory return 0ffh, which then becomes the
1030: ; Bit Mask as it's written in write mode 3.
1031:
1032: DRAW_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
1033: &ENTRY_LABEL&ENTRY_INDEX&:
1034: mov esi,edi
1035: mov ecx,eax
1036: rep movsb
1037: add edi,edx
1038: endm ;-----------------------------------;
1039:
1040: ; 5-or-wider read/write.
1041:
1042: align 4
1043: draw_wide_rw_loop proc near
1044: UNROLL_LOOP DRAW_WIDE_RW,RWWIDE,LOOP_UNROLL_COUNT
1045: dec ebx
1046: jnz draw_wide_rw_loop
1047:
1048: ret
1049:
1050: draw_wide_rw_loop endp
1051:
1052:
1053: ;-----------------------------------------------------------------------;
1054: ; Unrolled drawing stuff (unrolled to reduce jumps to speed things up),
1055: ; for cases where read before write is NOT required.
1056: ;-----------------------------------------------------------------------;
1057:
1058: ; Tables of entry points into unrolled 1-, 2-, 3-, and 4-wide write-only loops.
1059: ; Note that there may be separate entry tables for various alignments of a
1060: ; specific width, in cases where performance can be improved by using different
1061: ; code for different alignments.
1062:
1063: UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideWEntry,W1,LOOP_UNROLL_COUNT
1064: UNROLL_LOOP_ENTRY_TABLE pfnDraw2WideWEntry,W2,LOOP_UNROLL_COUNT
1065: UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideWEvenEntry,W3_EVEN,LOOP_UNROLL_COUNT
1066: UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideWOddEntry,W3_ODD,LOOP_UNROLL_COUNT
1067: UNROLL_LOOP_ENTRY_TABLE pfnDraw4WideWEntry,W4,LOOP_UNROLL_COUNT
1068: UNROLL_LOOP_ENTRY_TABLE pfnDraw5WideWEvenEntry,W5_EVEN,LOOP_UNROLL_COUNT
1069: UNROLL_LOOP_ENTRY_TABLE pfnDraw5WideWOddEntry,W5_ODD,LOOP_UNROLL_COUNT
1070: UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_0Entry,W6_MOD3_0,LOOP_UNROLL_COUNT
1071: UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_1Entry,W6_MOD3_1,LOOP_UNROLL_COUNT
1072: UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_2Entry,W6_MOD3_2,LOOP_UNROLL_COUNT
1073: UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_0Entry,W7_MOD3_0,LOOP_UNROLL_COUNT
1074: UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_1Entry,W7_MOD3_1,LOOP_UNROLL_COUNT
1075: UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_2Entry,W7_MOD3_2,LOOP_UNROLL_COUNT
1076: UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_3Entry,W7_MOD3_3,LOOP_UNROLL_COUNT
1077: UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_0Entry,W8_MOD3_0,LOOP_UNROLL_COUNT
1078: UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_1Entry,W8_MOD3_1,LOOP_UNROLL_COUNT
1079: UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_2Entry,W8_MOD3_2,LOOP_UNROLL_COUNT
1080: UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_3Entry,W8_MOD3_3,LOOP_UNROLL_COUNT
1081:
1082:
1083: ;-----------------------------------------------------------------------;
1084: ; Unrolled 1-, 2-, 3-, and 4-wide write-only edge-drawing loops.
1085: ;
1086: ; Entry:
1087: ; AL/AX/EAX = pixel mask (if AX or EAX, then 0xFFFF or 0xFFFFFFFF)
1088: ; EBX = unrolled loop count
1089: ; ECX = scan line width in bytes
1090: ; EDI = start offset
1091: ;
1092: ; EBX, EDI modified. All other registers preserved.
1093:
1094: ;-----------------------------------------------------------------------;
1095: ; Macro to draw one write-only byte, then advance to next scan line.
1096:
1097: DRAW_1_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
1098: &ENTRY_LABEL&ENTRY_INDEX&:
1099: mov [edi],al ;we always read 0xFF, so AL is written
1100: ; as-is; because we're in write mode 3,
1101: ; AL becomes the Bit Mask
1102: add edi,ecx ;point to the next scan line
1103: endm ;-----------------------------------;
1104:
1105: ; 1-wide write-only.
1106:
1107: align 4
1108: draw_1_wide_w_loop proc near
1109: UNROLL_LOOP DRAW_1_WIDE_W,W1,LOOP_UNROLL_COUNT
1110: dec ebx
1111: jnz draw_1_wide_w_loop
1112:
1113: ret
1114:
1115: draw_1_wide_w_loop endp
1116:
1117: ;-----------------------------------------------------------------------;
1118: ; Macro to draw two write-only bytes, then advance to next scan line.
1119:
1120: DRAW_2_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
1121: &ENTRY_LABEL&ENTRY_INDEX&:
1122: mov [edi],ax
1123: add edi,ecx ;point to the next scan line
1124: endm ;-----------------------------------;
1125:
1126: ; 2-wide write-only.
1127:
1128: align 4
1129: draw_2_wide_w_loop proc near
1130: UNROLL_LOOP DRAW_2_WIDE_W,W2,LOOP_UNROLL_COUNT
1131: dec ebx
1132: jnz draw_2_wide_w_loop
1133:
1134: ret
1135:
1136: draw_2_wide_w_loop endp
1137:
1138: ;-----------------------------------------------------------------------;
1139: ; Macro to draw three write-only bytes, then advance to next scan line.
1140: ; Optimized for even start address.
1141:
1142: DRAW_3_WIDE_W_EVEN macro ENTRY_LABEL,ENTRY_INDEX
1143: &ENTRY_LABEL&ENTRY_INDEX&:
1144: mov [edi],ax
1145: mov [edi+2],al
1146: add edi,ecx ;point to the next scan line
1147: endm ;-----------------------------------;
1148:
1149: ; 3-wide write-only, starting at an even address.
1150:
1151: align 4
1152: draw_3_wide_w_even_loop proc near
1153: UNROLL_LOOP DRAW_3_WIDE_W_EVEN,W3_EVEN,LOOP_UNROLL_COUNT
1154: dec ebx
1155: jnz draw_3_wide_w_even_loop
1156:
1157: ret
1158:
1159: draw_3_wide_w_even_loop endp
1160:
1161: ;-----------------------------------------------------------------------;
1162: ; Macro to draw three write-only bytes, then advance to next scan line.
1163: ; Optimized for odd start address.
1164:
1165: DRAW_3_WIDE_W_ODD macro ENTRY_LABEL,ENTRY_INDEX
1166: &ENTRY_LABEL&ENTRY_INDEX&:
1167: mov [edi],al
1168: mov [edi+1],ax
1169: add edi,ecx ;point to the next scan line
1170: endm ;-----------------------------------;
1171:
1172: ; 3-wide write-only, starting at an odd address.
1173:
1174: align 4
1175: draw_3_wide_w_odd_loop proc near
1176: UNROLL_LOOP DRAW_3_WIDE_W_ODD,W3_ODD,LOOP_UNROLL_COUNT
1177: dec ebx
1178: jnz draw_3_wide_w_odd_loop
1179:
1180: ret
1181:
1182: draw_3_wide_w_odd_loop endp
1183:
1184:
1185: ;-----------------------------------------------------------------------;
1186: ; Macro to draw four write-only bytes, then advance to next scan line.
1187:
1188: DRAW_4_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
1189: &ENTRY_LABEL&ENTRY_INDEX&:
1190: mov [edi],eax
1191: add edi,ecx ;point to the next scan line
1192: endm ;-----------------------------------;
1193:
1194: ; 4-wide write-only.
1195:
1196: align 4
1197: draw_4_wide_w_loop proc near
1198: UNROLL_LOOP DRAW_4_WIDE_W,W4,LOOP_UNROLL_COUNT
1199: dec ebx
1200: jnz draw_4_wide_w_loop
1201:
1202: ret
1203:
1204: draw_4_wide_w_loop endp
1205:
1206:
1207: ;-----------------------------------------------------------------------;
1208: ; Macro to draw five write-only bytes, then advance to next scan line.
1209: ; Optimized for even start address.
1210:
1211: DRAW_5_WIDE_W_EVEN macro ENTRY_LABEL,ENTRY_INDEX
1212: &ENTRY_LABEL&ENTRY_INDEX&:
1213: mov [edi],eax
1214: mov [edi+4],al
1215: add edi,ecx ;point to the next scan line
1216: endm ;-----------------------------------;
1217:
1218: ; 5-wide write-only, starting at an even address.
1219:
1220: align 4
1221: draw_5_wide_w_even_loop proc near
1222: UNROLL_LOOP DRAW_5_WIDE_W_EVEN,W5_EVEN,LOOP_UNROLL_COUNT
1223: dec ebx
1224: jnz draw_5_wide_w_even_loop
1225:
1226: ret
1227:
1228: draw_5_wide_w_even_loop endp
1229:
1230:
1231: ;-----------------------------------------------------------------------;
1232: ; Macro to draw five write-only bytes, then advance to next scan line.
1233: ; Optimized for odd start address.
1234:
1235: DRAW_5_WIDE_W_ODD macro ENTRY_LABEL,ENTRY_INDEX
1236: &ENTRY_LABEL&ENTRY_INDEX&:
1237: mov [edi],al
1238: mov [edi+1],eax
1239: add edi,ecx ;point to the next scan line
1240: endm ;-----------------------------------;
1241:
1242: ; 5-wide write-only, starting at an odd address.
1243:
1244: align 4
1245: draw_5_wide_w_odd_loop proc near
1246: UNROLL_LOOP DRAW_5_WIDE_W_ODD,W5_ODD,LOOP_UNROLL_COUNT
1247: dec ebx
1248: jnz draw_5_wide_w_odd_loop
1249:
1250: ret
1251:
1252: draw_5_wide_w_odd_loop endp
1253:
1254:
1255: ;-----------------------------------------------------------------------;
1256: ; Macro to draw six write-only bytes, then advance to next scan line.
1257: ; Optimized for start address MOD 3 == 0.
1258:
1259: DRAW_6_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
1260: &ENTRY_LABEL&ENTRY_INDEX&:
1261: mov [edi],eax
1262: mov [edi+4],ax
1263: add edi,ecx ;point to the next scan line
1264: endm ;-----------------------------------;
1265:
1266: ; 6-wide write-only, starting at MOD 3 == 0.
1267:
1268: align 4
1269: draw_6_wide_w_mod3_0_loop proc near
1270: UNROLL_LOOP DRAW_6_WIDE_W_MOD3_0,W6_MOD3_0,LOOP_UNROLL_COUNT
1271: dec ebx
1272: jnz draw_6_wide_w_mod3_0_loop
1273:
1274: ret
1275:
1276: draw_6_wide_w_mod3_0_loop endp
1277:
1278:
1279: ;-----------------------------------------------------------------------;
1280: ; Macro to draw six write-only bytes, then advance to next scan line.
1281: ; Optimized for start address MOD 3 == 1 or 3.
1282:
1283: DRAW_6_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
1284: &ENTRY_LABEL&ENTRY_INDEX&:
1285: mov [edi],al
1286: mov [edi+1],eax
1287: mov [edi+5],al
1288: add edi,ecx ;point to the next scan line
1289: endm ;-----------------------------------;
1290:
1291: ; 6-wide write-only, starting at MOD 3 == 1 or 3.
1292:
1293: align 4
1294: draw_6_wide_w_mod3_1_loop proc near
1295: UNROLL_LOOP DRAW_6_WIDE_W_MOD3_1,W6_MOD3_1,LOOP_UNROLL_COUNT
1296: dec ebx
1297: jnz draw_6_wide_w_mod3_1_loop
1298:
1299: ret
1300:
1301: draw_6_wide_w_mod3_1_loop endp
1302:
1303:
1304: ;-----------------------------------------------------------------------;
1305: ; Macro to draw six write-only bytes, then advance to next scan line.
1306: ; Optimized for start address MOD 3 == 2.
1307:
1308: DRAW_6_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
1309: &ENTRY_LABEL&ENTRY_INDEX&:
1310: mov [edi],ax
1311: mov [edi+2],eax
1312: add edi,ecx ;point to the next scan line
1313: endm ;-----------------------------------;
1314:
1315: ; 6-wide write-only, starting at MOD 3 == 2.
1316:
1317: align 4
1318: draw_6_wide_w_mod3_2_loop proc near
1319: UNROLL_LOOP DRAW_6_WIDE_W_MOD3_2,W6_MOD3_2,LOOP_UNROLL_COUNT
1320: dec ebx
1321: jnz draw_6_wide_w_mod3_2_loop
1322:
1323: ret
1324:
1325: draw_6_wide_w_mod3_2_loop endp
1326:
1327:
1328: ;-----------------------------------------------------------------------;
1329: ; Macro to draw seven write-only bytes, then advance to next scan line.
1330: ; Optimized for start address MOD 3 == 0.
1331:
1332: DRAW_7_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
1333: &ENTRY_LABEL&ENTRY_INDEX&:
1334: mov [edi],eax
1335: mov [edi+4],ax
1336: mov [edi+6],al
1337: add edi,ecx ;point to the next scan line
1338: endm ;-----------------------------------;
1339:
1340: ; 7-wide write-only, starting at MOD 3 == 0.
1341:
1342: align 4
1343: draw_7_wide_w_mod3_0_loop proc near
1344: UNROLL_LOOP DRAW_7_WIDE_W_MOD3_0,W7_MOD3_0,LOOP_UNROLL_COUNT
1345: dec ebx
1346: jnz draw_7_wide_w_mod3_0_loop
1347:
1348: ret
1349:
1350: draw_7_wide_w_mod3_0_loop endp
1351:
1352:
1353: ;-----------------------------------------------------------------------;
1354: ; Macro to draw seven write-only bytes, then advance to next scan line.
1355: ; Optimized for start address MOD 3 == 1.
1356:
1357: DRAW_7_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
1358: &ENTRY_LABEL&ENTRY_INDEX&:
1359: mov [edi],al
1360: mov [edi+1],ax
1361: mov [edi+3],eax
1362: add edi,ecx ;point to the next scan line
1363: endm ;-----------------------------------;
1364:
1365: ; 7-wide write-only, starting at MOD 3 == 0.
1366:
1367: align 4
1368: draw_7_wide_w_mod3_1_loop proc near
1369: UNROLL_LOOP DRAW_7_WIDE_W_MOD3_1,W7_MOD3_1,LOOP_UNROLL_COUNT
1370: dec ebx
1371: jnz draw_7_wide_w_mod3_1_loop
1372:
1373: ret
1374:
1375: draw_7_wide_w_mod3_1_loop endp
1376:
1377:
1378: ;-----------------------------------------------------------------------;
1379: ; Macro to draw seven write-only bytes, then advance to next scan line.
1380: ; Optimized for start address MOD 3 == 2.
1381:
1382: DRAW_7_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
1383: &ENTRY_LABEL&ENTRY_INDEX&:
1384: mov [edi],ax
1385: mov [edi+2],eax
1386: mov [edi+6],al
1387: add edi,ecx ;point to the next scan line
1388: endm ;-----------------------------------;
1389:
1390: ; 7-wide write-only, starting at MOD 3 == 2.
1391:
1392: align 4
1393: draw_7_wide_w_mod3_2_loop proc near
1394: UNROLL_LOOP DRAW_7_WIDE_W_MOD3_2,W7_MOD3_2,LOOP_UNROLL_COUNT
1395: dec ebx
1396: jnz draw_7_wide_w_mod3_2_loop
1397:
1398: ret
1399:
1400: draw_7_wide_w_mod3_2_loop endp
1401:
1402:
1403: ;-----------------------------------------------------------------------;
1404: ; Macro to draw seven write-only bytes, then advance to next scan line.
1405: ; Optimized for start address MOD 3 == 3.
1406:
1407: DRAW_7_WIDE_W_MOD3_3 macro ENTRY_LABEL,ENTRY_INDEX
1408: &ENTRY_LABEL&ENTRY_INDEX&:
1409: mov [edi],al
1410: mov [edi+1],eax
1411: mov [edi+5],ax
1412: add edi,ecx ;point to the next scan line
1413: endm ;-----------------------------------;
1414:
1415: ; 7-wide write-only, starting at MOD 3 == 3.
1416:
1417: align 4
1418: draw_7_wide_w_mod3_3_loop proc near
1419: UNROLL_LOOP DRAW_7_WIDE_W_MOD3_3,W7_MOD3_3,LOOP_UNROLL_COUNT
1420: dec ebx
1421: jnz draw_7_wide_w_mod3_3_loop
1422:
1423: ret
1424:
1425: draw_7_wide_w_mod3_3_loop endp
1426:
1427:
1428: ;-----------------------------------------------------------------------;
1429: ; Macro to draw eight write-only bytes, then advance to next scan line.
1430: ; Optimized for start address MOD 3 == 0.
1431:
1432: DRAW_8_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
1433: &ENTRY_LABEL&ENTRY_INDEX&:
1434: mov [edi],eax
1435: mov [edi+4],eax
1436: add edi,ecx ;point to the next scan line
1437: endm ;-----------------------------------;
1438:
1439: ; 8-wide write-only, starting at MOD 3 == 0.
1440:
1441: align 4
1442: draw_8_wide_w_mod3_0_loop proc near
1443: UNROLL_LOOP DRAW_8_WIDE_W_MOD3_0,W8_MOD3_0,LOOP_UNROLL_COUNT
1444: dec ebx
1445: jnz draw_8_wide_w_mod3_0_loop
1446:
1447: ret
1448:
1449: draw_8_wide_w_mod3_0_loop endp
1450:
1451:
1452: ;-----------------------------------------------------------------------;
1453: ; Macro to draw eight write-only bytes, then advance to next scan line.
1454: ; Optimized for start address MOD 3 == 1.
1455:
1456: DRAW_8_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
1457: &ENTRY_LABEL&ENTRY_INDEX&:
1458: mov [edi],al
1459: mov [edi+1],ax
1460: mov [edi+3],eax
1461: mov [edi+7],al
1462: add edi,ecx ;point to the next scan line
1463: endm ;-----------------------------------;
1464:
1465: ; 8-wide write-only, starting at MOD 3 == 0.
1466:
1467: align 4
1468: draw_8_wide_w_mod3_1_loop proc near
1469: UNROLL_LOOP DRAW_8_WIDE_W_MOD3_1,W8_MOD3_1,LOOP_UNROLL_COUNT
1470: dec ebx
1471: jnz draw_8_wide_w_mod3_1_loop
1472:
1473: ret
1474:
1475: draw_8_wide_w_mod3_1_loop endp
1476:
1477:
1478: ;-----------------------------------------------------------------------;
1479: ; Macro to draw eight write-only bytes, then advance to next scan line.
1480: ; Optimized for start address MOD 3 == 2.
1481:
1482: DRAW_8_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
1483: &ENTRY_LABEL&ENTRY_INDEX&:
1484: mov [edi],ax
1485: mov [edi+2],eax
1486: mov [edi+6],ax
1487: add edi,ecx ;point to the next scan line
1488: endm ;-----------------------------------;
1489:
1490: ; 8-wide write-only, starting at MOD 3 == 2.
1491:
1492: align 4
1493: draw_8_wide_w_mod3_2_loop proc near
1494: UNROLL_LOOP DRAW_8_WIDE_W_MOD3_2,W8_MOD3_2,LOOP_UNROLL_COUNT
1495: dec ebx
1496: jnz draw_8_wide_w_mod3_2_loop
1497:
1498: ret
1499:
1500: draw_8_wide_w_mod3_2_loop endp
1501:
1502:
1503: ;-----------------------------------------------------------------------;
1504: ; Macro to draw eight write-only bytes, then advance to next scan line.
1505: ; Optimized for start address MOD 3 == 3.
1506:
1507: DRAW_8_WIDE_W_MOD3_3 macro ENTRY_LABEL,ENTRY_INDEX
1508: &ENTRY_LABEL&ENTRY_INDEX&:
1509: mov [edi],al
1510: mov [edi+1],eax
1511: mov [edi+5],ax
1512: mov [edi+7],al
1513: add edi,ecx ;point to the next scan line
1514: endm ;-----------------------------------;
1515:
1516: ; 8-wide write-only, starting at MOD 3 == 3.
1517:
1518: align 4
1519: draw_8_wide_w_mod3_3_loop proc near
1520: UNROLL_LOOP DRAW_8_WIDE_W_MOD3_3,W8_MOD3_3,LOOP_UNROLL_COUNT
1521: dec ebx
1522: jnz draw_8_wide_w_mod3_3_loop
1523:
1524: ret
1525:
1526: draw_8_wide_w_mod3_3_loop endp
1527:
1528: ;-----------------------------------------------------------------------;
1529: ; Unrolled loop stuff for wide replace-type rops (arbitrary width).
1530: ;-----------------------------------------------------------------------;
1531:
1532: ; Tables of entry points into unrolled wide write-only loops.
1533: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW00Entry,W_00,LOOP_UNROLL_COUNT
1534: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW01Entry,W_01,LOOP_UNROLL_COUNT
1535: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW02Entry,W_02,LOOP_UNROLL_COUNT
1536: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW03Entry,W_03,LOOP_UNROLL_COUNT
1537: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW10Entry,W_10,LOOP_UNROLL_COUNT
1538: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW11Entry,W_11,LOOP_UNROLL_COUNT
1539: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW12Entry,W_12,LOOP_UNROLL_COUNT
1540: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW13Entry,W_13,LOOP_UNROLL_COUNT
1541: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW20Entry,W_20,LOOP_UNROLL_COUNT
1542: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW21Entry,W_21,LOOP_UNROLL_COUNT
1543: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW22Entry,W_22,LOOP_UNROLL_COUNT
1544: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW23Entry,W_23,LOOP_UNROLL_COUNT
1545: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW30Entry,W_30,LOOP_UNROLL_COUNT
1546: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW31Entry,W_31,LOOP_UNROLL_COUNT
1547: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW32Entry,W_32,LOOP_UNROLL_COUNT
1548: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW33Entry,W_33,LOOP_UNROLL_COUNT
1549:
1550: ;-----------------------------------------------------------------------;
1551: ; Macro to draw n write-only bytes, 0 leading bytes, 0 trailing bytes,
1552: ; then advance to next scan line.
1553:
1554: DRAW_WIDE_W_00 macro ENTRY_LABEL,ENTRY_INDEX
1555: &ENTRY_LABEL&ENTRY_INDEX&:
1556: mov ecx,esi ;# of whole dwords
1557: rep stosd ;fill all whole bytes as dwords
1558: add edi,edx ;point to the next scan line
1559: endm ;-----------------------------------;
1560:
1561: ; N-wide write-only, 0 leading bytes, 0 trailing bytes.
1562: ; EAX = 0ffffh
1563: ; EBX = count of scans to fill
1564: ; EDX = offset from end of one scan's fill to start of next
1565: ; ESI = # of dwords to fill
1566: ; EDI = target address to fill
1567:
1568: align 4
1569: draw_wide_w_00_loop proc near
1570: UNROLL_LOOP DRAW_WIDE_W_00,W_00,LOOP_UNROLL_COUNT
1571: dec ebx
1572: jnz draw_wide_w_00_loop
1573:
1574: ret
1575:
1576: draw_wide_w_00_loop endp
1577:
1578:
1579: ;-----------------------------------------------------------------------;
1580: ; Macro to draw n write-only bytes, 0 leading bytes, 1 trailing byte,
1581: ; then advance to next scan line.
1582:
1583: DRAW_WIDE_W_01 macro ENTRY_LABEL,ENTRY_INDEX
1584: &ENTRY_LABEL&ENTRY_INDEX&:
1585: mov ecx,esi ;# of whole dwords
1586: rep stosd ;fill whole bytes as dwords
1587: stosb ;fill the trailing byte
1588: add edi,edx ;point to the next scan line
1589: endm ;-----------------------------------;
1590:
1591: ; N-wide write-only, 0 leading bytes, 1 trailing byte.
1592: ; EAX = # of dwords to fill
1593: ; EBX = count of scans to fill
1594: ; EDX = offset from end of one scan's fill to start of next
1595: ; ESI = # of dwords to fill
1596: ; EDI = target address to fill
1597:
1598: align 4
1599: draw_wide_w_01_loop proc near
1600: UNROLL_LOOP DRAW_WIDE_W_01,W_01,LOOP_UNROLL_COUNT
1601: dec ebx
1602: jnz draw_wide_w_01_loop
1603:
1604: ret
1605:
1606: draw_wide_w_01_loop endp
1607:
1608:
1609: ;-----------------------------------------------------------------------;
1610: ; Macro to draw n write-only bytes, 0 leading bytes, 2 trailing bytes,
1611: ; then advance to next scan line.
1612:
1613: DRAW_WIDE_W_02 macro ENTRY_LABEL,ENTRY_INDEX
1614: &ENTRY_LABEL&ENTRY_INDEX&:
1615: mov ecx,esi ;# of whole dwords
1616: rep stosd ;fill whole bytes as dwords
1617: stosw ;fill the trailing word
1618: add edi,edx ;point to the next scan line
1619: endm ;-----------------------------------;
1620:
1621: ; N-wide write-only, 0 leading bytes, 2 trailing byte.
1622: ; EAX = # of dwords to fill
1623: ; EBX = count of scans to fill
1624: ; EDX = offset from end of one scan's fill to start of next
1625: ; ESI = # of dwords to fill
1626: ; EDI = target address to fill
1627:
1628: align 4
1629: draw_wide_w_02_loop proc near
1630: UNROLL_LOOP DRAW_WIDE_W_02,W_02,LOOP_UNROLL_COUNT
1631: dec ebx
1632: jnz draw_wide_w_02_loop
1633:
1634: ret
1635:
1636: draw_wide_w_02_loop endp
1637:
1638:
1639: ;-----------------------------------------------------------------------;
1640: ; Macro to draw n write-only bytes, 0 leading bytes, 3 trailing bytes,
1641: ; then advance to next scan line.
1642:
1643: DRAW_WIDE_W_03 macro ENTRY_LABEL,ENTRY_INDEX
1644: &ENTRY_LABEL&ENTRY_INDEX&:
1645: mov ecx,esi ;# of whole dwords
1646: rep stosd ;fill whole bytes as dwords
1647: stosw ;fill the trailing word
1648: stosb ;fill the trailing byte
1649: add edi,edx ;point to the next scan line
1650: endm ;-----------------------------------;
1651:
1652: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
1653: ; EAX = # of dwords to fill
1654: ; EBX = count of scans to fill
1655: ; EDX = offset from end of one scan's fill to start of next
1656: ; ESI = # of dwords to fill
1657: ; EDI = target address to fill
1658:
1659: align 4
1660: draw_wide_w_03_loop proc near
1661: UNROLL_LOOP DRAW_WIDE_W_03,W_03,LOOP_UNROLL_COUNT
1662: dec ebx
1663: jnz draw_wide_w_03_loop
1664:
1665: ret
1666:
1667: draw_wide_w_03_loop endp
1668:
1669:
1670: ;-----------------------------------------------------------------------;
1671: ; Macro to draw n write-only bytes, 1 leading byte, 0 trailing bytes,
1672: ; then advance to next scan line.
1673:
1674: DRAW_WIDE_W_10 macro ENTRY_LABEL,ENTRY_INDEX
1675: &ENTRY_LABEL&ENTRY_INDEX&:
1676: stosb ;fill the leading byte
1677: mov ecx,esi ;# of whole dwords
1678: rep stosd ;fill all whole bytes as dwords
1679: add edi,edx ;point to the next scan line
1680: endm ;-----------------------------------;
1681:
1682: ; N-wide write-only, 1 leading byte, 0 trailing bytes.
1683: ; EAX = # of dwords to fill
1684: ; EBX = count of scans to fill
1685: ; EDX = offset from end of one scan's fill to start of next
1686: ; ESI = # of dwords to fill
1687: ; EDI = target address to fill
1688:
1689: align 4
1690: draw_wide_w_10_loop proc near
1691: UNROLL_LOOP DRAW_WIDE_W_10,W_10,LOOP_UNROLL_COUNT
1692: dec ebx
1693: jnz draw_wide_w_10_loop
1694:
1695: ret
1696:
1697: draw_wide_w_10_loop endp
1698:
1699:
1700: ;-----------------------------------------------------------------------;
1701: ; Macro to draw n write-only bytes, 1 leading byte, 1 trailing byte,
1702: ; then advance to next scan line.
1703:
1704: DRAW_WIDE_W_11 macro ENTRY_LABEL,ENTRY_INDEX
1705: &ENTRY_LABEL&ENTRY_INDEX&:
1706: stosb ;fill the leading byte
1707: mov ecx,esi ;# of whole dwords
1708: rep stosd ;fill whole bytes as dwords
1709: stosb ;fill the trailing byte
1710: add edi,edx ;point to the next scan line
1711: endm ;-----------------------------------;
1712:
1713: ; N-wide write-only, 1 leading bytes, 1 trailing byte.
1714: ; EAX = # of dwords to fill
1715: ; EBX = count of scans to fill
1716: ; EDX = offset from end of one scan's fill to start of next
1717: ; ESI = # of dwords to fill
1718: ; EDI = target address to fill
1719:
1720: align 4
1721: draw_wide_w_11_loop proc near
1722: UNROLL_LOOP DRAW_WIDE_W_11,W_11,LOOP_UNROLL_COUNT
1723: dec ebx
1724: jnz draw_wide_w_11_loop
1725:
1726: ret
1727:
1728: draw_wide_w_11_loop endp
1729:
1730:
1731: ;-----------------------------------------------------------------------;
1732: ; Macro to draw n write-only bytes, 1 leading byte, 2 trailing bytes,
1733: ; then advance to next scan line.
1734:
1735: DRAW_WIDE_W_12 macro ENTRY_LABEL,ENTRY_INDEX
1736: &ENTRY_LABEL&ENTRY_INDEX&:
1737: stosb ;fill the leading byte
1738: mov ecx,esi ;# of whole dwords
1739: rep stosd ;fill whole bytes as dwords
1740: stosw ;fill the trailing word
1741: add edi,edx ;point to the next scan line
1742: endm ;-----------------------------------;
1743:
1744: ; N-wide write-only, 1 leading bytes, 2 trailing byte.
1745: ; EAX = # of dwords to fill
1746: ; EBX = count of scans to fill
1747: ; EDX = offset from end of one scan's fill to start of next
1748: ; ESI = # of dwords to fill
1749: ; EDI = target address to fill
1750:
1751: align 4
1752: draw_wide_w_12_loop proc near
1753: UNROLL_LOOP DRAW_WIDE_W_12,W_12,LOOP_UNROLL_COUNT
1754: dec ebx
1755: jnz draw_wide_w_12_loop
1756:
1757: ret
1758:
1759: draw_wide_w_12_loop endp
1760:
1761:
1762: ;-----------------------------------------------------------------------;
1763: ; Macro to draw n write-only bytes, 1 leading byte, 3 trailing bytes,
1764: ; then advance to next scan line.
1765:
1766: DRAW_WIDE_W_13 macro ENTRY_LABEL,ENTRY_INDEX
1767: &ENTRY_LABEL&ENTRY_INDEX&:
1768: stosb ;fill the leading byte
1769: mov ecx,esi ;# of whole dwords
1770: rep stosd ;fill whole bytes as dwords
1771: stosw ;fill the trailing word
1772: stosb ;fill the trailing byte
1773: add edi,edx ;point to the next scan line
1774: endm ;-----------------------------------;
1775:
1776: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
1777: ; EAX = # of dwords to fill
1778: ; EBX = count of scans to fill
1779: ; EDX = offset from end of one scan's fill to start of next
1780: ; ESI = # of dwords to fill
1781: ; EDI = target address to fill
1782:
1783: align 4
1784: draw_wide_w_13_loop proc near
1785: UNROLL_LOOP DRAW_WIDE_W_13,W_13,LOOP_UNROLL_COUNT
1786: dec ebx
1787: jnz draw_wide_w_13_loop
1788:
1789: ret
1790:
1791: draw_wide_w_13_loop endp
1792:
1793:
1794: ;-----------------------------------------------------------------------;
1795: ; Macro to draw n write-only bytes, 2 leading bytes, 0 trailing bytes,
1796: ; then advance to next scan line.
1797:
1798: DRAW_WIDE_W_20 macro ENTRY_LABEL,ENTRY_INDEX
1799: &ENTRY_LABEL&ENTRY_INDEX&:
1800: stosw ;fill the leading word
1801: mov ecx,esi ;# of whole dwords
1802: rep stosd ;fill all whole bytes as dwords
1803: add edi,edx ;point to the next scan line
1804: endm ;-----------------------------------;
1805:
1806: ; N-wide write-only, 2 leading bytes, 0 trailing bytes.
1807: ; EAX = # of dwords to fill
1808: ; EBX = count of scans to fill
1809: ; EDX = offset from end of one scan's fill to start of next
1810: ; ESI = # of dwords to fill
1811: ; EDI = target address to fill
1812:
1813: align 4
1814: draw_wide_w_20_loop proc near
1815: UNROLL_LOOP DRAW_WIDE_W_20,W_20,LOOP_UNROLL_COUNT
1816: dec ebx
1817: jnz draw_wide_w_20_loop
1818:
1819: ret
1820:
1821: draw_wide_w_20_loop endp
1822:
1823:
1824: ;-----------------------------------------------------------------------;
1825: ; Macro to draw n write-only bytes, 2 leading bytes, 1 trailing byte,
1826: ; then advance to next scan line.
1827:
1828: DRAW_WIDE_W_21 macro ENTRY_LABEL,ENTRY_INDEX
1829: &ENTRY_LABEL&ENTRY_INDEX&:
1830: stosw ;fill the leading word
1831: mov ecx,esi ;# of whole dwords
1832: rep stosd ;fill whole bytes as dwords
1833: stosb ;fill the trailing byte
1834: add edi,edx ;point to the next scan line
1835: endm ;-----------------------------------;
1836:
1837: ; N-wide write-only, 2 leading bytess, 1 trailing byte.
1838: ; EAX = # of dwords to fill
1839: ; EBX = count of scans to fill
1840: ; EDX = offset from end of one scan's fill to start of next
1841: ; ESI = # of dwords to fill
1842: ; EDI = target address to fill
1843:
1844: align 4
1845: draw_wide_w_21_loop proc near
1846: UNROLL_LOOP DRAW_WIDE_W_21,W_21,LOOP_UNROLL_COUNT
1847: dec ebx
1848: jnz draw_wide_w_21_loop
1849:
1850: ret
1851:
1852: draw_wide_w_21_loop endp
1853:
1854:
1855: ;-----------------------------------------------------------------------;
1856: ; Macro to draw n write-only bytes, 2 leading bytes, 2 trailing bytes,
1857: ; then advance to next scan line.
1858:
1859: DRAW_WIDE_W_22 macro ENTRY_LABEL,ENTRY_INDEX
1860: &ENTRY_LABEL&ENTRY_INDEX&:
1861: stosw ;fill the leading word
1862: mov ecx,esi ;# of whole dwords
1863: rep stosd ;fill whole bytes as dwords
1864: stosw ;fill the trailing word
1865: add edi,edx ;point to the next scan line
1866: endm ;-----------------------------------;
1867:
1868: ; N-wide write-only, 2 leading bytess, 2 trailing byte.
1869: ; EAX = # of dwords to fill
1870: ; EBX = count of scans to fill
1871: ; EDX = offset from end of one scan's fill to start of next
1872: ; ESI = # of dwords to fill
1873: ; EDI = target address to fill
1874:
1875: align 4
1876: draw_wide_w_22_loop proc near
1877: UNROLL_LOOP DRAW_WIDE_W_22,W_22,LOOP_UNROLL_COUNT
1878: dec ebx
1879: jnz draw_wide_w_22_loop
1880:
1881: ret
1882:
1883: draw_wide_w_22_loop endp
1884:
1885:
1886: ;-----------------------------------------------------------------------;
1887: ; Macro to draw n write-only bytes, 2 leading bytes, 3 trailing bytes,
1888: ; then advance to next scan line.
1889:
1890: DRAW_WIDE_W_23 macro ENTRY_LABEL,ENTRY_INDEX
1891: &ENTRY_LABEL&ENTRY_INDEX&:
1892: stosw ;fill the leading word
1893: mov ecx,esi ;# of whole dwords
1894: rep stosd ;fill whole bytes as dwords
1895: stosw ;fill the trailing word
1896: stosb ;fill the trailing byte
1897: add edi,edx ;point to the next scan line
1898: endm ;-----------------------------------;
1899:
1900: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
1901: ; EAX = # of dwords to fill
1902: ; EBX = count of scans to fill
1903: ; EDX = offset from end of one scan's fill to start of next
1904: ; ESI = # of dwords to fill
1905: ; EDI = target address to fill
1906:
1907: align 4
1908: draw_wide_w_23_loop proc near
1909: UNROLL_LOOP DRAW_WIDE_W_23,W_23,LOOP_UNROLL_COUNT
1910: dec ebx
1911: jnz draw_wide_w_23_loop
1912:
1913: ret
1914:
1915: draw_wide_w_23_loop endp
1916:
1917:
1918: ;-----------------------------------------------------------------------;
1919: ; Macro to draw n write-only bytes, 3 leading bytes, 0 trailing bytes,
1920: ; then advance to next scan line.
1921:
1922: DRAW_WIDE_W_30 macro ENTRY_LABEL,ENTRY_INDEX
1923: &ENTRY_LABEL&ENTRY_INDEX&:
1924: stosb ;fill the leading byte
1925: stosw ;fill the leading word
1926: mov ecx,esi ;# of whole dwords
1927: rep stosd ;fill all whole bytes as dwords
1928: add edi,edx ;point to the next scan line
1929: endm ;-----------------------------------;
1930:
1931: ; N-wide write-only, 3 leading bytes, 0 trailing bytes.
1932: ; EAX = # of dwords to fill
1933: ; EBX = count of scans to fill
1934: ; EDX = offset from end of one scan's fill to start of next
1935: ; ESI = # of dwords to fill
1936: ; EDI = target address to fill
1937:
1938: align 4
1939: draw_wide_w_30_loop proc near
1940: UNROLL_LOOP DRAW_WIDE_W_30,W_30,LOOP_UNROLL_COUNT
1941: dec ebx
1942: jnz draw_wide_w_30_loop
1943:
1944: ret
1945:
1946: draw_wide_w_30_loop endp
1947:
1948:
1949: ;-----------------------------------------------------------------------;
1950: ; Macro to draw n write-only bytes, 3 leading bytes, 1 trailing byte,
1951: ; then advance to next scan line.
1952:
1953: DRAW_WIDE_W_31 macro ENTRY_LABEL,ENTRY_INDEX
1954: &ENTRY_LABEL&ENTRY_INDEX&:
1955: stosb ;fill the leading byte
1956: stosw ;fill the leading word
1957: mov ecx,esi ;# of whole dwords
1958: rep stosd ;fill whole bytes as dwords
1959: stosb ;fill the trailing byte
1960: add edi,edx ;point to the next scan line
1961: endm ;-----------------------------------;
1962:
1963: ; N-wide write-only, 3 leading bytess, 1 trailing byte.
1964: ; EAX = # of dwords to fill
1965: ; EBX = count of scans to fill
1966: ; EDX = offset from end of one scan's fill to start of next
1967: ; ESI = # of dwords to fill
1968: ; EDI = target address to fill
1969:
1970: align 4
1971: draw_wide_w_31_loop proc near
1972: UNROLL_LOOP DRAW_WIDE_W_31,W_31,LOOP_UNROLL_COUNT
1973: dec ebx
1974: jnz draw_wide_w_31_loop
1975:
1976: ret
1977:
1978: draw_wide_w_31_loop endp
1979:
1980:
1981: ;-----------------------------------------------------------------------;
1982: ; Macro to draw n write-only bytes, 3 leading bytes, 2 trailing bytes,
1983: ; then advance to next scan line.
1984:
1985: DRAW_WIDE_W_32 macro ENTRY_LABEL,ENTRY_INDEX
1986: &ENTRY_LABEL&ENTRY_INDEX&:
1987: stosb ;fill the leading byte
1988: stosw ;fill the leading word
1989: mov ecx,esi ;# of whole dwords
1990: rep stosd ;fill whole bytes as dwords
1991: stosw ;fill the trailing word
1992: add edi,edx ;point to the next scan line
1993: endm ;-----------------------------------;
1994:
1995: ; N-wide write-only, 3 leading bytess, 2 trailing byte.
1996: ; EAX = # of dwords to fill
1997: ; EBX = count of scans to fill
1998: ; EDX = offset from end of one scan's fill to start of next
1999: ; ESI = # of dwords to fill
2000: ; EDI = target address to fill
2001:
2002: align 4
2003: draw_wide_w_32_loop proc near
2004: UNROLL_LOOP DRAW_WIDE_W_32,W_32,LOOP_UNROLL_COUNT
2005: dec ebx
2006: jnz draw_wide_w_32_loop
2007:
2008: ret
2009:
2010: draw_wide_w_32_loop endp
2011:
2012:
2013: ;-----------------------------------------------------------------------;
2014: ; Macro to draw n write-only bytes, 3 leading bytes, 3 trailing bytes,
2015: ; then advance to next scan line.
2016:
2017: DRAW_WIDE_W_33 macro ENTRY_LABEL,ENTRY_INDEX
2018: &ENTRY_LABEL&ENTRY_INDEX&:
2019: stosb ;fill the leading byte
2020: stosw ;fill the leading word
2021: mov ecx,esi ;# of whole dwords
2022: rep stosd ;fill whole bytes as dwords
2023: stosw ;fill the trailing word
2024: stosb ;fill the trailing byte
2025: add edi,edx ;point to the next scan line
2026: endm ;-----------------------------------;
2027:
2028: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
2029: ; EAX = # of dwords to fill
2030: ; EBX = count of scans to fill
2031: ; EDX = offset from end of one scan's fill to start of next
2032: ; ESI = # of dwords to fill
2033: ; EDI = target address to fill
2034:
2035: align 4
2036: draw_wide_w_33_loop proc near
2037: UNROLL_LOOP DRAW_WIDE_W_33,W_33,LOOP_UNROLL_COUNT
2038: dec ebx
2039: jnz draw_wide_w_33_loop
2040:
2041: ret
2042:
2043: draw_wide_w_33_loop endp
2044:
2045:
2046: ;--------------------------Private-Routine------------------------------;
2047: ; comp_byte_interval
2048: ;
2049: ; A interval will be computed for byte boundaries.
2050: ;
2051: ; A first mask and a last mask will be calculated, and possibly
2052: ; combined into the inner loop count. If no first byte exists,
2053: ; the start address will be incremented to adjust for it.
2054: ;
2055: ; Entry:
2056: ; EBX = right coordinate (exclusive)
2057: ; EDX = left coordinate (inclusive)
2058: ; Returns:
2059: ; EDI = offset to first byte to be altered in the scan
2060: ; ESI = inner loop count
2061: ; AL = first byte mask (possibly 0)
2062: ; AH = last byte mask (possibly 0)
2063: ; Error Returns:
2064: ; None
2065: ; Registers Preserved:
2066: ; ES,BP
2067: ; Registers Destroyed:
2068: ; AX,BX,CX,DX,SI,DI,FLAGS
2069: ; Calls:
2070: ; None
2071: ; History:
2072: ; Sat 11-Apr-1987 20:39:10 -by- Walt Moore [waltm]
2073: ; Created.
2074: ;-----------------------------------------------------------------------;
2075:
2076: cProc comp_byte_interval
2077:
2078: sub ebx,edx ;Compute extent of interval
2079: dec ebx ;Make interval inclusive
2080: mov edi,edx ;Don't destroy starting X
2081: shr edi,3 ;/8 for byte address
2082:
2083: and edx,00000111b ;Compute bit index for left side
2084: add ebx,edx ;Compute bit index for right side
2085: mov esi,ebx ;(save for inner loop count)
2086: and ebx,00000111b
2087: mov cl,dl ;Compute left side altered bits mask
2088: mov eax,0FFFFFFFFh
2089: mov edx,eax ;Need this here later
2090: shr al,cl ;AL = left side altered bytes mask
2091: mov cl,bl ;Compute right side altered bits mask
2092: mov ah,80h
2093: sar ah,cl ;AH = right side altered bits mask
2094: shr esi,3 ;Compute inner byte count
2095: jnz short comp_byte_dont_combine ;loop count + 1 > 0, check it out
2096:
2097: ; Only one byte will be affected. Combine first/last masks, set loop count = 0
2098:
2099: and al,ah ;Will use first byte mask only
2100: xor ah,ah ;Want last byte mask to be 0
2101: inc esi ;Fall through to set 0
2102:
2103: comp_byte_dont_combine:
2104: dec esi ;Dec inner loop count (might become 0)
2105:
2106:
2107: ; If all pixels in the first byte are altered, combine the first byte into the
2108: ; inner loop and clear the first byte mask. Ditto for the last byte mask.
2109:
2110: cmp al,dl ;Set 'C' if not all pixels 1
2111: sbb esi,edx ;If no 'C', sub -1 (add 1), else sub 0
2112: cmp al,dl ;Set 'C' if not all pixels 1
2113: sbb al,dl ;If no 'C', sub -1 (add 1), else sub 0
2114:
2115: cmp ah,dl ;Set 'C' if not all pixels 1
2116: sbb esi,edx ;If no 'C', sub -1 (add 1), else sub 0
2117: cmp ah,dl ;Set 'C' if not all pixels 1
2118: sbb ah,dl ;If no 'C', sub -1 (add 1), else sub 0
2119: cRet comp_byte_interval
2120:
2121: endProc comp_byte_interval
2122:
2123: _TEXT$01 ends
2124:
2125: end
2126:
2127:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.