|
|
1.1 root 1: ;---------------------------Module-Header------------------------------;
2: ; Module Name: vgablts.asm
3: ;
4: ; Copyright (c) 1992-1993 Microsoft Corporation
5: ;-----------------------------------------------------------------------;
6: ;-----------------------------------------------------------------------;
7: ; VOID vTrgBlt(PDEV * ppdev, ULONG culRcl, RECTL * prcl, MIX ulMix,
8: ; ULONG ulClr, POINTL * pptlBrush)
9: ; Input:
10: ; ppdev - pointer to PDEV for surface to which to draw
11: ; culRcl - # of rectangles to fill
12: ; prcl - pointer to list of rectangles to fill
13: ; ulMix - mix rop with which to fill
14: ; ulClr - color with which to fill
15: ; pptlBrush - not used
16: ;
17: ; Performs accelerated solid area fills for all mixes.
18: ;
19: ;-----------------------------------------------------------------------;
20: ;
21: ; Note: Assumes all rectangles have positive heights and widths. Will not
22: ; work properly if this is not the case.
23: ;
24: ;-----------------------------------------------------------------------;
25: ;
26: ; Note: Cases where the width of the whole bytes fill is equal to the
27: ; width of the bitmap could be sped up by using a single REP MOVS or REP
28: ; STOS, but how often does WIN32 do a fill that's the width of the screen?
29: ; Not very.
30: ;
31: ;-----------------------------------------------------------------------;
32:
33: comment $
34:
35: The overall approach of this module is to accept a list of rectangles to
36: fill, set up the VGA hardware for the desired fill, and then fill the
37: rectangles one at a time. Each rectangle fill is set up for everything
38: but vertical parameters, and then decomposed into the sections that
39: intersect each VGA bank; each section is drawn in turn. The drawing code
40: is heavily unrolled for performance, and vectors are set up so that the
41: drawing code appropriate for the desired fill is essentially threaded
42: together.
43:
44: commend $
45:
46: ;-----------------------------------------------------------------------;
47:
48: ; Set LOOP_UNROLL_SHIFT to the log2 of the number of times you want loops in
49: ; this module unrolled. For example, LOOP_UNROLL_SHIFT of 3 yields 2**3 = 8
50: ; times unrolling. This is the only thing you need to change to control
51: ; unrolling.
52:
53: LOOP_UNROLL_SHIFT equ 2
54:
55: ;-----------------------------------------------------------------------;
56:
57: .386
58:
59: ifndef DOS_PLATFORM
60: .model small,c
61: else
62: ifdef STD_CALL
63: .model small,c
64: else
65: .model small,pascal
66: endif; STD_CALL
67: endif; DOS_PLATFORM
68:
69: assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
70: assume fs:nothing,gs:nothing
71:
72: .xlist
73: include stdcall.inc ;calling convention cmacros
74: include i386\strucs.inc
75: include i386\driver.inc
76: include i386\egavga.inc
77: include i386\unroll.inc
78: include i386\ropdefs.inc
79:
80: .list
81:
82: ;-----------------------------------------------------------------------;
83:
84: .data
85:
86: ;-----------------------------------------------------------------------;
87: ; Left edge clip masks for intrabyte start addresses 0 through 3.
88: ; Whole byte cases are flagged as 0ffh.
89: public jLeftMask
90: jLeftMask label byte
91: db 0ffh,0eh,0ch,08h
92:
93: ;-----------------------------------------------------------------------;
94: ; Right edge clip masks for intrabyte end addresses (non-inclusive)
95: ; 0 through 3. Whole byte cases are flagged as 0ffh.
96: public jRightMask
97: jRightMask label byte
98: db 0ffh,01h,03h,07h
99:
100: ;-----------------------------------------------------------------------;
101: ; Tables used to set up for the desired raster op. Note that entries for raster
102: ; ops that aren't handled here are generally correct, except that they ignore
103: ; need for inversion of the destination, which those rops require.
104:
105: ; Table used to force off the drawing color for R2_BLACK (0).
106: ; The first entry is ignored; there is no mix 0.
107: public jForceOffTable
108: jForceOffTable db 0
109: db 000h,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh
110: db 0ffh,0ffh,000h,0ffh,0ffh,0ffh,0ffh,0ffh
111:
112: ;-----------------------------------------------------------------------;
113: ; Table used to force on the drawing color for R2_NOT (Dn) and R2_WHITE (1).
114: ; The first entry is ignored; there is no mix 0.
115: public jForceOnTable
116: jForceOnTable db 0, 0,0,0,0,0,0ffh,0,0,0,0,0,0,0,0,0,0ffh
117:
118: ;-----------------------------------------------------------------------;
119: ; Table used to invert the passed-in drawing color for Pn mixes.
120: ; The first entry is ignored; there is no mix 0.
121: public jNotTable
122: jNotTable db 0, 0,0ffh,0ffh,0ffh,0,0,0,0ffh,0,0ffh,0,0ffh,0,0,0,0
123:
124: ;-----------------------------------------------------------------------;
125: ; Table of VGA ALU logical functions corresponding to mixes. Note that Dn is
126: ; handled as a separate preceding inversion pass when part of a more complex
127: ; mix.
128: ; The first entry is ignored; there is no mix 0.
129: public jALUFuncTable
130: jALUFuncTable db 0
131: db DR_SET,DR_AND,DR_AND,DR_SET
132: db DR_AND,DR_XOR,DR_XOR,DR_OR
133: db DR_AND,DR_XOR, 0,DR_OR
134: db DR_SET,DR_OR ,DR_OR ,DR_SET
135:
136: ;-----------------------------------------------------------------------;
137: ; 1 entries mark rops that require two passes, one to invert the destination
138: ; and then another to finish the rop.
139: ; The first entry is ignored; there is no mix 0.
140: public jInvertDest
141: jInvertDest db 0, 0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0
142:
143: ;-----------------------------------------------------------------------;
144: ; Table of routines to be called to draw edges, according to which edges are
145: ; partial and which edges are whole bytes.
146: align 4
147: pfnEdgeDrawing label dword
148: dd do_right_edge_bytes
149: dd do_both_edge_bytes
150: dd check_next_bank
151: dd do_left_edge_bytes
152:
153: ;-----------------------------------------------------------------------;
154: ; Table of pointers to tables used to find entries points in unrolled wide
155: ; whole byte code.
156:
157: align 4
158: pfnWideWholeRep label dword
159: dd pfnDrawWideW00Entry
160: dd pfnDrawWideW01Entry
161: dd pfnDrawWideW02Entry
162: dd pfnDrawWideW03Entry
163: dd pfnDrawWideW10Entry
164: dd pfnDrawWideW11Entry
165: dd pfnDrawWideW12Entry
166: dd pfnDrawWideW13Entry
167: dd pfnDrawWideW20Entry
168: dd pfnDrawWideW21Entry
169: dd pfnDrawWideW22Entry
170: dd pfnDrawWideW23Entry
171: dd pfnDrawWideW30Entry
172: dd pfnDrawWideW31Entry
173: dd pfnDrawWideW32Entry
174: dd pfnDrawWideW33Entry
175:
176: ;-----------------------------------------------------------------------;
177: ; Table of pointers to tables used to find entry points in narrow,
178: ; special-cased unrolled non-replace whole byte code.
179:
180: ; Note: The breakpoint where one should switch from special-casing to
181: ; REP MOVSB is purely a guess on my part. 5 seemed reasonable.
182:
183: align 4
184: pfnWholeBytesNonReplaceEntries label dword
185: dd 0 ;we never get a 0-wide case
186: dd pfnDraw1WideRWEntry
187: dd pfnDraw2WideRWEntry
188: dd pfnDraw3WideRWEntry
189: dd pfnDraw4WideRWEntry
190: MAX_NON_REPLACE_SPECIAL equ ($-pfnWholeBytesNonReplaceEntries)/4
191:
192: ;-----------------------------------------------------------------------;
193: ; Table of pointers to tables used to find entry points in narrow, special-
194: ; cased unrolled replace whole byte code.
195:
196: ; Note: The breakpoint where one should switch from special-casing to
197: ; REP STOS is purely a guess on my part. 8 seemed reasonable.
198:
199: ; Start address MOD 3 is 0.
200: align 4
201: pfnWholeBytesMod0ReplaceEntries label dword
202: dd 0 ;we never get a 0-wide case
203: dd pfnDraw1WideWEntry
204: dd pfnDraw2WideWEntry
205: dd pfnDraw3WideWEvenEntry
206: dd pfnDraw4WideWEntry
207: dd pfnDraw5WideWEvenEntry
208: dd pfnDraw6WideWMod3_0Entry
209: dd pfnDraw7WideWMod3_0Entry
210: dd pfnDraw8WideWMod3_0Entry
211: MAX_REPLACE_SPECIAL equ ($-pfnWholeBytesMod0ReplaceEntries)/4
212:
213: ; Start address MOD 3 is 1.
214: align 4
215: pfnWholeBytesMod1ReplaceEntries label dword
216: dd 0 ;we never get a 0-wide case
217: dd pfnDraw1WideWEntry
218: dd pfnDraw2WideWEntry
219: dd pfnDraw3WideWOddEntry
220: dd pfnDraw4WideWEntry
221: dd pfnDraw5WideWOddEntry
222: dd pfnDraw6WideWMod3_1Entry
223: dd pfnDraw7WideWMod3_1Entry
224: dd pfnDraw8WideWMod3_1Entry
225:
226: ; Start address MOD 3 is 2.
227: align 4
228: pfnWholeBytesMod2ReplaceEntries label dword
229: dd 0 ;we never get a 0-wide case
230: dd pfnDraw1WideWEntry
231: dd pfnDraw2WideWEntry
232: dd pfnDraw3WideWEvenEntry
233: dd pfnDraw4WideWEntry
234: dd pfnDraw5WideWEvenEntry
235: dd pfnDraw6WideWMod3_2Entry
236: dd pfnDraw7WideWMod3_2Entry
237: dd pfnDraw8WideWMod3_2Entry
238:
239: ; Start address MOD 3 is 3.
240: align 4
241: pfnWholeBytesMod3ReplaceEntries label dword
242: dd 0 ;we never get a 0-wide case
243: dd pfnDraw1WideWEntry
244: dd pfnDraw2WideWEntry
245: dd pfnDraw3WideWOddEntry
246: dd pfnDraw4WideWEntry
247: dd pfnDraw5WideWOddEntry
248: dd pfnDraw6WideWMod3_1Entry
249: dd pfnDraw7WideWMod3_3Entry
250: dd pfnDraw8WideWMod3_3Entry
251:
252: ; Master MOD 3 alignment look-up table for entry tables for four possible
253: ; alignments for narrow, special-cased unrolled replace whole byte code.
254: align 4
255: pfnWholeBytesReplaceMaster label dword
256: dd pfnWholeBytesMod0ReplaceEntries
257: dd pfnWholeBytesMod1ReplaceEntries
258: dd pfnWholeBytesMod2ReplaceEntries
259: dd pfnWholeBytesMod3ReplaceEntries
260:
261: ;-----------------------------------------------------------------------;
262:
263: .code
264:
265: ;-----------------------------------------------------------------------;
266:
267: cProc vTrgBlt,24,< \
268: uses esi edi ebx, \
269: ppdev: ptr, \
270: culRcl: dword, \
271: prcl: ptr RECTL, \
272: ulMix: dword, \
273: ulColor: dword, \
274: pptlBrsuh:ptr POINTL >
275:
276: local ulRowOffset :dword ;Offset from start of scan line of
277: ; first byte to fill
278: local ulWholeBytes :dword ;# of whole bytes to fill
279: local ulWholeDwords :dword ;# of whole dwords to fill
280: local pfnWholeFn :dword ;pointer to routine used to draw
281: ; whole bytes
282: local ulScanWidth :dword ;offset from start of one scan to start
283: ; of next
284: local ulNextScan :dword ;offset from end of one scan line's
285: ; fill to start of next
286: local ulCurrentTopScan :dword ;top scan line to fill in current bank
287: local ulMasks :dword ;low byte = right mask, high byte =
288: ; left mask
289: local ulBottomScan :dword ;bottom scan line of fill rectangle
290: local pfnDraw1WideVector :dword ;address at which to enter unrolled
291: ; edge loop
292: local jALUFunc :dword ;VGA ALU logical operation (SET, AND,
293: ; OR, or XOR)
294: local pfnStartDrawing :dword ;pointer to function to call to start
295: ; drawing
296: local pfnContinueDrawing :dword ;pointer to function to call to
297: ; continue drawing after doing whole
298: ; bytes
299: local ulLeftEdgeAdjust :dword ;used to bump the whole bytes start
300: ; address past the left edge when the
301: ; left edge is partial
302: local pfnWholeBytes :dword ;pointer to table of entry points
303: ; into unrolled loops for whole byte
304: ; filling
305: local jInvertDestFirst :dword ;1 if the rop requires a pass to invert
306: ; the destination before the normal
307: ; pass
308: local ulDrawingColor :dword ;color byte with which to fill,
309: ; replicated to a dword
310: local ppfnDrawEdgeTable :dword ;points to table to be used to look up
311: ; unrolled entry points for edge
312: ; bytes (pfnDraw1WideRWEntry or
313: ; pfnDraw1WideWEntry)
314:
315: ;-----------------------------------------------------------------------;
316: ; CLD is assumed on entry.
317: ;-----------------------------------------------------------------------;
318:
319: ;-----------------------------------------------------------------------;
320: ; Make sure there's something to draw; clip enumerations can be empty.
321: ;-----------------------------------------------------------------------;
322:
323: cmp culRcl,0 ;any rects to fill?
324: jz vTrgBlts_done ;no, we're done
325:
326:
327: ;-----------------------------------------------------------------------;
328: ; Set up variables that are constant for the entire time we're in this
329: ; module.
330: ;-----------------------------------------------------------------------;
331:
332: ;-----------------------------------------------------------------------;
333: ; Set up for the desired raster op.
334: ;-----------------------------------------------------------------------;
335:
336: sub ebx,ebx ;ignore any background mix; we're only
337: mov bl,byte ptr ulMix ; concerned with the foreground in this
338: ; module
339: cmp ebx,R2_NOP ;is this NOP?
340: jz vTrgBlts_done ;yes, we're done
341: mov al,jInvertDest[ebx] ;remember whether we need to
342: mov byte ptr jInvertDestFirst,al ; invert the destination before
343: ; finishing the rop
344: mov ah,byte ptr ulColor ;get the drawing color
345: and ah,jForceOffTable[ebx] ;force color to 0 if necessary
346: ; (R2_BLACK)
347: or ah,jForceOnTable[ebx] ;force color to 0ffh if necessary
348: ; (R2_WHITE, R2_NOT)
349: xor ah,jNotTable[ebx] ;invert color if necessary (any Pn mix)
350: ;at this point, CH has the color we
351: ; want to draw with; set up the VGA
352: ; hardware to draw with that color
353: mov al,ah ;replicate the drawing color to a dword
354: mov edx,eax
355: shl eax,16
356: mov ax,dx
357: mov ulDrawingColor,eax ;remember drawing color
358:
359: mov ppfnDrawEdgeTable,offset pfnDraw1WideWEntry
360: ;assume replace-type rop, so we can
361: ; draw edge bytes with the write-
362: ; without-read code pointed to by this
363: ; table
364: mov ah,jALUFuncTable[ebx] ;get the ALU logical function
365: and ah,ah ;is the logical function DR_SET?
366: .errnz DR_SET
367: jz short skip_ALU_set ;yes, don't have to set because that's
368: ; the VGA's default state
369: mov edx,VGA_BASE + GRAF_ADDR
370: mov al,GRAF_DATA_ROT
371: out dx,ax ;set the ALU logical function
372: mov ppfnDrawEdgeTable,offset pfnDraw1WideRWEntry
373: ;draw edge bytes with the code pointed
374: ; to by this table (read/write)
375: skip_ALU_set:
376: mov byte ptr jALUFunc,ah ;remember the ALU logical function
377:
378: ;-----------------------------------------------------------------------;
379: ; Fill the current rectangle with the specified raster op and color.
380: ;-----------------------------------------------------------------------;
381:
382: fill_rect_loop:
383:
384: ;-----------------------------------------------------------------------;
385: ; Set up variables that are constant from bank to bank during a single
386: ; fill.
387: ;-----------------------------------------------------------------------;
388:
389: ;-----------------------------------------------------------------------;
390: ; Set up masks and widths.
391: ;-----------------------------------------------------------------------;
392:
393: mov edi,prcl ;point to rectangle to fill
394: mov eax,[edi].yBottom
395: mov ulBottomScan,eax ;remember the bottom scan line of fill
396:
397: mov ebx,[edi].xRight ;right edge of fill (non-inclusive)
398: mov ecx,ebx
399: and ecx,011b ;intrabyte address of right edge
400: mov ah,jRightMask[ecx] ;right edge mask
401:
402: mov esi,[edi].xLeft ;left edge of fill (inclusive)
403: mov ecx,esi
404: shr ecx,2 ;/4 for start offset from left edge
405: ; of scan line
406: mov ulRowOffset,ecx ;remember offset from start of scan
407: ; line
408: sub ebx,esi ;width in pixels of fill
409:
410: and esi,011b ;intrabyte address of left edge
411: mov al,jLeftMask[esi] ;left edge mask
412:
413: dec ebx ;make inclusive on right
414: add ebx,esi ;inclusive width, starting counting at
415: ; the beginning of the left edge byte
416: shr ebx,2 ;width of fill in bytes touched - 1
417: jnz short more_than_1_byte ;more than 1 byte is involved
418:
419: ; Only one byte will be affected. Combine first/last masks.
420:
421: and al,ah ;we'll use first byte mask only
422: xor ah,ah ;want last byte mask to be 0
423: inc ebx ;so there's one count to subtract below
424: ; if this isn't a whole edge byte
425: more_than_1_byte:
426:
427: ; If all pixels in the left edge are altered, combine the first byte into the
428: ; whole byte count and clear the first byte mask, because we can handle solid
429: ; edge bytes faster as part of the whole bytes. Ditto for the right edge.
430:
431: sub ecx,ecx ;edge whole-status accumulator
432: cmp al,-1 ;is left edge a whole byte or partial?
433: adc ecx,ecx ;ECX=1 if left edge partial, 0 if whole
434: sub ebx,ecx ;if left edge partial, deduct it from
435: ; the whole bytes count
436: mov ulLeftEdgeAdjust,ecx ;for skipping over the left edge if
437: ; it's partial when pointing to the
438: ; whole bytes
439: and ah,ah ;is right edge mask 0, meaning this
440: ; fill is only 1 byte wide?
441: jz short save_masks ;yes, no need to do anything
442: cmp ah,-1 ;is right edge a whole byte or partial?
443: jnz short save_masks ;partial
444: add ecx,2 ;bit 1 of ECX=0 if right edge partial,
445: ; 1 if whole;
446: ;bit 1=0 if left edge partial, 1 whole
447: inc ebx ;if right edge whole, include it in the
448: ; whole bytes count
449: save_masks:
450: mov ulMasks,eax ;save left and right clip masks
451: mov ulWholeBytes,ebx ;save # of whole bytes
452:
453: mov ecx,pfnEdgeDrawing[ecx*4] ;set address of routine to draw
454: mov pfnContinueDrawing,ecx ; all partial (non-whole) edges
455:
456: and ebx,ebx ;any whole bytes?
457: jz short start_vec_set ;no
458: ;yes, so draw the whole bytes before
459: ; the edge bytes
460:
461: ; The whole bytes loop depends on the type of operation being done. If the
462: ; operation is one which uses DR_SET, then we can use a STOS-type operation,
463: ; else we have to use a MOVSB-type operation (to load the latches with the
464: ; existing contents of display memory to allow the ALUs to work).
465:
466: cmp byte ptr jALUFunc,DR_SET ;is it a replace-type rop?
467: jz short is_replace_type ;yes
468: ;no, set up for non-replace whole bytes
469: mov ecx,offset whole_bytes_non_replace_wide
470: ;assume too wide to special-case
471: cmp ebx,MAX_NON_REPLACE_SPECIAL ;too wide to special case?
472: jnb short start_vec_set ;yes
473: mov ecx,pfnWholeBytesNonReplaceEntries[ebx*4] ;no, point to entry
474: mov pfnWholeBytes,ecx ; table for width
475: mov ecx,offset whole_bytes_special
476: ;set up to call special routine to fill
477: ; whole bytes
478: jmp short start_vec_set
479:
480: align 4
481: is_replace_type: ;set up for replace-type rop
482: cmp ebx,MAX_REPLACE_SPECIAL ;too wide to special case?
483: jnb short is_wide_replace ;yes
484: ;narrow enough to special case. Look up
485: ; the entry table for the special case
486: ; base on the start alignment
487: mov ecx,ulRowOffset
488: add ecx,ulLeftEdgeAdjust ;left edge whole bytes start offset
489: and ecx,011b ;left edge whole bytes start alignment
490: ; MOD 3
491: mov ecx,pfnWholeBytesReplaceMaster[ecx*4] ;look up table of entry
492: ; tables for alignment
493: mov ecx,[ecx+ebx*4] ;look up entry table for width
494: mov pfnWholeBytes,ecx ; table for width
495: mov ecx,offset whole_bytes_special
496: ;set up to call special routine to fill
497: ; whole bytes
498: jmp short start_vec_set
499:
500: align 4
501: is_wide_replace: ;set up for wide replace-type op
502: ;Note: assumes there is at least one
503: ; full dword involved!
504: mov ecx,ulRowOffset
505: add ecx,ulLeftEdgeAdjust ;left edge whole bytes start offset
506: neg ecx
507: and ecx,011b
508: mov edx,ebx
509: sub edx,ecx ;ignore odd leading bytes
510: mov eax,edx
511: shr edx,2 ;# of whole dwords across (not counting
512: ; odd leading & trailing bytes)
513: mov ulWholeDwords,edx
514: and eax,011b ;# of odd (fractional) trailing bytes
515: shl ecx,2
516: or ecx,eax ;build a look-up index from the number
517: ; of leading and trailing bytes
518: mov ecx,pfnWideWholeRep[ecx*4] ;proper drawing handler for front/
519: mov pfnWholeBytes,ecx ; back alignment
520: mov ecx,offset whole_bytes_rep_wide
521: ;set up to call routine to perform wide
522: ; whole bytes fill
523: start_vec_set:
524: mov pfnStartDrawing,ecx ; all partial (non-whole) edges
525:
526: mov ecx,ppdev
527: mov eax,[ecx].pdev_lPlanarNextScan
528: mov ulScanWidth,eax ;local copy of scan line width
529: sub eax,ebx ;EAX = delta to next scan
530: mov ulNextScan,eax
531:
532:
533: ;-----------------------------------------------------------------------;
534: ; Fill this rectangle.
535: ;-----------------------------------------------------------------------;
536:
537: cmp byte ptr jInvertDestFirst,1
538: ;is this an invert-dest-plus-something-
539: ; else rop that requires two passes?
540: jz short do_invert_dest_rop ;yes, special case with two passes
541:
542: do_single_pass:
543: call draw_banks
544:
545:
546: ;-----------------------------------------------------------------------;
547: ; See if there are any more rectangles to fill.
548: ;-----------------------------------------------------------------------;
549:
550: add prcl,(size RECTL) ;point to the next rectangle, if there is one
551: dec culRcl ;count down the rectangles to fill
552: jnz fill_rect_loop
553:
554:
555: ;-----------------------------------------------------------------------;
556: ; We have filled all rectangles. Restore the VGA to its default state.
557: ;-----------------------------------------------------------------------;
558:
559: cmp byte ptr jALUfunc,DR_SET ;is the logical function already SET?
560: jnz short @F ;no, need to reset it
561: cRet vTrgBlt ;yes, no need to reset it
562:
563: align 4
564: @@:
565: mov edx,VGA_BASE + GRAF_ADDR
566: mov eax,(DR_SET shl 8) + GRAF_DATA_ROT ;set the logical function to
567: out dx,ax ; SET
568: vTrgBlts_done:
569: cRet vTrgBlt
570:
571:
572: ;-----------------------------------------------------------------------;
573: ; Handles rops that require two passes, the first being a destination
574: ; inversion pass.
575: ;-----------------------------------------------------------------------;
576:
577: align 4
578: do_invert_dest_rop:
579:
580: ; Set up the VGA's hardware for inversion
581:
582: mov eax,ulDrawingColor ;remember the normal drawing color
583: push eax
584: mov ulDrawingColor,-1 ;with XOR, this flips all bits
585:
586: mov edx,VGA_BASE + GRAF_ADDR
587: mov eax,(DR_XOR shl 8) + GRAF_DATA_ROT
588: out dx,ax ;logical function = XOR to invert
589:
590: ; Invert the destination
591:
592: call draw_banks
593:
594: ; Restore the VGA's hardware to the state required for the second pass.
595:
596: mov edx,VGA_BASE + GRAF_ADDR
597: mov ah,byte ptr jALUFunc
598: mov al,GRAF_DATA_ROT
599: out dx,ax ;set the ALU logical function back to
600: ; proper state for the rest of the rop
601:
602: pop eax
603: mov ulDrawingColor,eax ;restore the normal drawing color
604:
605: ; Perform the second pass to finish the rop.
606:
607: jmp do_single_pass
608:
609:
610: ;-----------------------------------------------------------------------;
611: ; Fills all banks in the current fill rectangle. Called once per fill
612: ; rectangle, except for destination-inversion-plus-something-else rops.
613: ;-----------------------------------------------------------------------;
614:
615: align 4
616: draw_banks:
617:
618: ;-----------------------------------------------------------------------;
619: ; Map in the bank containing the top scan to fill, if it's not mapped in
620: ; already.
621: ;-----------------------------------------------------------------------;
622:
623: mov edi,prcl ;point to rectangle to fill
624: mov ecx,ppdev ;point to PDEV
625: mov eax,[edi].yTop ;top scan line of fill
626: mov ulCurrentTopScan,eax ;this will be the fill top in 1st bank
627:
628: cmp eax,[ecx].pdev_rcl1PlanarClip.yTop ;is fill top less than
629: ; current bank?
630: jl short map_init_bank ;yes, map in proper bank
631: cmp eax,[ecx].pdev_rcl1PlanarClip.yBottom ;fill top greater than
632: ; current bank?
633: jl short init_bank_mapped ;no, proper bank already mapped
634: map_init_bank:
635:
636: ; Map in the bank containing the top scan line of the fill.
637:
638: ptrCall <dword ptr [ecx].pdev_pfnPlanarControl>,<ecx,eax,JustifyTop>
639:
640: init_bank_mapped:
641:
642: ;-----------------------------------------------------------------------;
643: ; Main loop for processing fill in each bank.
644: ;-----------------------------------------------------------------------;
645:
646: ; Compute the starting address and scan line count for the initial bank.
647:
648: mov eax,ppdev ;point to PDEV
649: mov ebx,ulBottomScan ;bottom of destination rectangle
650: cmp ebx,[eax].pdev_rcl1PlanarClip.yBottom
651: ;which comes first, the bottom of the
652: ; dest rect or the bottom of the
653: ; current bank?
654: jl short BottomScanSet ;fill bottom comes first, so draw to
655: ; that; this is the last bank in fill
656: mov ebx,[eax].pdev_rcl1PlanarClip.yBottom
657: ;bank bottom comes first; draw to
658: ; bottom of bank
659: BottomScanSet:
660: mov edi,ulCurrentTopScan ;top scan line to fill in current bank
661: sub ebx,edi ;# of scans to fill in bank
662: imul edi,ulScanWidth ;offset of starting scan line
663:
664: ; Note that the start of the bitmap will change each time through the
665: ; bank loop, because the start of the bitmap is varied to map the
666: ; desired scan line to the banking window.
667:
668: add edi,[eax].pdev_pvBitmapStart ;start of scan in bitmap
669: add edi,ulRowOffset ;EDI = start offset of fill in bitmap
670:
671: ; We have computed the starting address and scan count. Time to start drawing
672: ; in the initial bank.
673:
674: jmp pfnStartDrawing
675:
676:
677: ;-----------------------------------------------------------------------;
678: ; Whole byte fills.
679: ;-----------------------------------------------------------------------;
680:
681: ;-----------------------------------------------------------------------;
682: ; Handles non-replace whole byte fills wider than the maximum special
683: ; case width.
684: ;
685: ; The destination is not involved, so a STOS (or equivalent) can be used
686: ; (no read needed before write).
687: ;-----------------------------------------------------------------------;
688:
689: align 4
690: whole_bytes_rep_wide:
691: push ebx ;save scan count
692: push edi ;save starting address
693:
694: mov eax,pfnWholeBytes ;point to entry table for unrolled
695: ; loop for whole byte width
696: SET_UP_UNROLL_VARS ebx,ecx, ebx,[eax], LOOP_UNROLL_SHIFT
697: add edi,ulLeftEdgeAdjust ;point to first whole byte to fill
698: mov esi,ulWholeDwords ;whole dwords width
699: mov edx,ulNextScan ;offset from end of one scan line to
700: ; start of next
701: mov eax,ulDrawingColor ;each byte is color with which to fill
702:
703: call ecx ;draw the wide whole bytes
704:
705: pop edi ;restore screen pointer
706: pop ebx ;restore fill scan count
707: jmp pfnContinueDrawing ;either keep drawing or we're done
708:
709:
710: ;-----------------------------------------------------------------------;
711: ; Handles both replace and non-replace whole byte fills narrow enough to
712: ; special case.
713: ;-----------------------------------------------------------------------;
714:
715: align 4
716: whole_bytes_special:
717: push ebx ;save scan count
718: push edi ;save starting address
719:
720: mov eax,pfnWholeBytes ;point to entry table for unrolled
721: ; loop for whole byte width
722: SET_UP_UNROLL_VARS ebx,edx, ebx,[eax], LOOP_UNROLL_SHIFT
723: add edi,ulLeftEdgeAdjust ;point to first whole byte to fill
724: mov ecx,ulScanWidth ;offset to next scan line
725: mov eax,ulDrawingColor ;each byte is color with which to fill
726:
727: call edx ;draw the whole bytes
728:
729: pop edi ;restore screen pointer
730: pop ebx ;restore fill scan count
731: jmp pfnContinueDrawing ;either keep drawing or we're done
732:
733:
734: ;-----------------------------------------------------------------------;
735: ; Handles non-replace whole byte fills wider than the maximum special case
736: ; width.
737: ;
738: ; The destination is involved, so a MOVSB (or equivalent) must be
739: ; performed in order to do a read before write to give the ALUs something
740: ; to work with.
741: ;-----------------------------------------------------------------------;
742:
743: align 4
744: whole_bytes_non_replace_wide:
745: push ebx ;save scan count
746: push edi ;save starting address
747:
748: add edi,ulLeftEdgeAdjust ;point to first whole byte to fill
749: mov esi,ulWholeBytes ;whole bytes width
750: mov edx,ulNextScan ;offset from end of one scan line to
751: ; start of next
752: mov eax,ulDrawingColor ;each byte is color with which to fill
753:
754: ;-----------------------------------------------------------------------;
755: ; 5-or-wider read before write loop.
756: ;
757: ; Entry:
758: ; EAX = # of bytes to fill across scan line (needed only by 5-or-wider
759: ; handler)
760: ; EBX = unrolled loop count
761: ; EDX = offset from end of one scan line to the start of the next next
762: ; EDI = start offset
763: ;
764: ; EBX, ECX, ESI, EDI modified. All other registers preserved.
765:
766: ; 5-or-wider read/write.
767:
768: draw_wide_rw_loop:
769: mov ecx,esi
770: @@:
771: mov ah,[edi] ;latch the target address. The data read
772: ; doesn't matter
773: mov [edi],al ;merge the drawing color with the latched
774: ; target address according to the selected ALU
775: ; function, and write the result to display
776: ; memory
777: inc edi ;point to the next byte
778: dec ecx
779: jnz @B
780: add edi,edx
781: dec ebx
782: jnz draw_wide_rw_loop
783:
784: pop edi ;restore screen pointer
785: pop ebx ;restore fill scan count
786: jmp pfnContinueDrawing ;either keep drawing or we're done
787:
788:
789: ;-----------------------------------------------------------------------;
790: ; Process any left/right columns that that have to be done.
791: ;
792: ; Currently:
793: ; EBX = height to fill, in scans
794: ; EDI --> first byte of left edge
795: ;-----------------------------------------------------------------------;
796:
797: ;-----------------------------------------------------------------------;
798: ; Handle case where both edges are partial (non-whole) bytes. We don't
799: ; have to read before write because we're using the Map Mask, not the
800: ; Bit Mask.
801: ;-----------------------------------------------------------------------;
802: align 4
803: public do_both_edge_bytes
804: do_both_edge_bytes:
805:
806: ; Set up variables for entering unrolled loop.
807:
808: mov al,byte ptr ulMasks ;this will become the clip mask for the
809: ; left edge
810: mov edx,VGA_BASE + SEQ_DATA ;SEQ_INDEX already points to Map Mask
811: out dx,al ;set Map Mask for left edge
812:
813: mov ecx,ppfnDrawEdgeTable
814: SET_UP_UNROLL_VARS ebx,edx, ebx,[ecx], LOOP_UNROLL_SHIFT
815: mov pfnDraw1WideVector,edx
816:
817: mov ecx,ulScanWidth ;offset from one scan to next
818:
819: mov esi,ulWholeBytes ;ESI = # of whole bytes
820: lea esi,[esi+edi+1] ;--> start for right edge
821: mov eax,ulDrawingColor ;each byte is color with which to fill
822:
823: push ebx ;preserve scan line count
824: call edx ;jump into the unrolled loop to draw
825: pop ebx ;restore scan line count
826:
827: mov edi,esi ;point to first right edge byte
828: mov al,byte ptr ulMasks+1 ;this will become the Bit Mask for the
829: ; right edge
830: mov edx,VGA_BASE + SEQ_DATA ;SEQ_INDEX already points to Map Mask
831: out dx,al ;set Map Mask for left edge
832:
833: mov eax,ulDrawingColor ;each byte is color with which to fill
834:
835: push offset edges_done ;return here
836: jmp pfnDraw1WideVector ;jump into the unrolled loop to draw
837:
838: ;-----------------------------------------------------------------------;
839: ; Handle case where only the left edge is partial (non-whole).
840: ;-----------------------------------------------------------------------;
841: align 4
842: do_left_edge_bytes:
843:
844: ; Set up variables for entering unrolled loop.
845:
846: mov ecx,ppfnDrawEdgeTable
847: SET_UP_UNROLL_VARS ebx,esi, ebx,[ecx], LOOP_UNROLL_SHIFT
848:
849: mov ecx,ulScanWidth ;offset from one scan to next
850: mov al,byte ptr ulMasks ;this will become the Bit Mask for the
851: ; left edge
852: mov edx,VGA_BASE + SEQ_DATA ;SEQ_INDEX already points to Map Mask
853: out dx,al ;set Map Mask for left edge
854:
855: mov eax,ulDrawingColor ;each byte is color with which to fill
856:
857: push offset edges_done ;return here
858: jmp esi ;jump into the unrolled loop to draw
859:
860: ;-----------------------------------------------------------------------;
861: ; Handle case where only the right edge is partial (non-whole).
862: ;-----------------------------------------------------------------------;
863: align 4
864: do_right_edge_bytes:
865:
866: ; Set up variables for entering unrolled loop.
867:
868: mov ecx,ppfnDrawEdgeTable
869: SET_UP_UNROLL_VARS ebx,esi, ebx,[ecx], LOOP_UNROLL_SHIFT
870:
871: mov ecx,ulScanWidth ;offset from one scan to next
872: add edi,ulWholeBytes ;--> start for right edge (remember,
873: ; left edge is whole, so the left edge
874: ; byte is included in the whole byte
875: ; count)
876: mov al,byte ptr ulMasks+1 ;this will become the Bit Mask for the
877: ; right edge
878: mov edx,VGA_BASE + SEQ_DATA ;SEQ_INDEX already points to Map Mask
879: out dx,al ;set Map Mask for right edge
880:
881: mov eax,ulDrawingColor ;each byte is color with which to fill
882:
883: call esi ;jump into the unrolled loop to draw
884:
885: ;-----------------------------------------------------------------------;
886: ; We have done all partial edges.
887: ;-----------------------------------------------------------------------;
888:
889: edges_done:
890:
891: mov edx,VGA_BASE + SEQ_DATA ;SEQ_INDEX already points to Map Mask
892: mov al,MM_ALL ;restore the default Map Mask of all
893: out dx,al ; planes enabled
894:
895: ;-----------------------------------------------------------------------;
896: ; See if there are any more banks to process.
897: ;-----------------------------------------------------------------------;
898:
899: check_next_bank:
900:
901: mov edi,ppdev
902: mov eax,[edi].pdev_rcl1PlanarClip.yBottom ;is the fill bottom in
903: cmp ulBottomScan,eax ; the current bank?
904: jle short banks_done ;yes, so we're done
905: ;no, map in the next bank and fill it
906: mov ulCurrentTopScan,eax ;remember where the top of the bank
907: ; we're about to map in is (same as
908: ; bottom of bank we just did)
909:
910: ptrCall <dword ptr [edi].pdev_pfnPlanarControl>,<edi,eax,JustifyTop>
911: ;map in the bank
912:
913: ; Compute the starting address and scan line count in this bank.
914:
915: mov eax,ppdev ;EAX->target surface
916: mov ebx,ulBottomScan ;bottom of destination rectangle
917: cmp ebx,[eax].pdev_rcl1PlanarClip.yBottom
918: ;which comes first, the bottom of the
919: ; dest rect or the bottom of the
920: ; current bank?
921: jl short BottomScanSet2 ;fill bottom comes first, so draw to
922: ; that; this is the last bank in fill
923: mov ebx,[eax].pdev_rcl1PlanarClip.yBottom
924: ;bank bottom comes first; draw to
925: ; bottom of bank
926: BottomScanSet2:
927: mov edi,ulCurrentTopScan ;top scan line to fill in current bank
928: sub ebx,edi ;# of scans to fill in bank
929: imul edi,ulScanWidth ;offset of starting scan line
930:
931: ; Note that the start of the bitmap will change each time through the
932: ; bank loop, because the start of the bitmap is varied to map the
933: ; desired scan line to the banking window.
934:
935: add edi,[eax].pdev_pvBitmapStart ;start of scan in bitmap
936: add edi,ulRowOffset ;EDI = start offset of fill in bitmap
937:
938: ; Draw in the new bank.
939:
940: jmp pfnStartDrawing
941:
942:
943: ;-----------------------------------------------------------------------;
944: ; Done with all banks in this fill.
945:
946: banks_done:
947: retn
948:
949: endProc vTrgBlt
950:
951:
952: ;-----------------------------------------------------------------------;
953: ; Unrolled loops.
954: ; There are two kinds of unrolled loops: read-before-write (to load the
955: ; latches), and write-only (for replace-type rops).
956: ;-----------------------------------------------------------------------;
957:
958:
959: ;-----------------------------------------------------------------------;
960: ; Unrolled drawing stuff for cases where read before write is required,
961: ; to load the latches.
962: ;-----------------------------------------------------------------------;
963:
964: ; Tables of entry points into unrolled 1-, 2-, 3-, and 4-wide, and 5-or-wider
965: ; read before write loops.
966:
967: UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideRWEntry,RW1,LOOP_UNROLL_COUNT
968: UNROLL_LOOP_ENTRY_TABLE pfnDraw2WideRWEntry,RW2,LOOP_UNROLL_COUNT
969: UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideRWEntry,RW3,LOOP_UNROLL_COUNT
970: UNROLL_LOOP_ENTRY_TABLE pfnDraw4WideRWEntry,RW4,LOOP_UNROLL_COUNT
971:
972: ;-----------------------------------------------------------------------;
973: ; Unrolled 1-, 2-, 3-, and 4-wide read before write drawing loops.
974: ;
975: ; Entry:
976: ; AL = drawing color
977: ; EBX = unrolled loop count
978: ; ECX = scan line width in bytes
979: ; EDI = start offset
980: ;
981: ; EBX, EDI modified. All other registers preserved.
982:
983: ;-----------------------------------------------------------------------;
984: ; Macro to draw one read before write byte, then advance to next scan line.
985:
986: DRAW_1_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
987: &ENTRY_LABEL&ENTRY_INDEX&:
988: mov ah,[edi] ;latch the target address. The data read
989: ; doesn't matter
990: mov [edi],al ;merge the drawing color with the latched
991: ; target address according to the selected ALU
992: ; function, and write the result to display
993: ; memory
994: add edi,ecx ;point to the next scan line
995: endm ;-----------------------------------;
996:
997: ; 1-wide read/write.
998:
999: align 4
1000: draw_1_wide_rw_loop proc near
1001: UNROLL_LOOP DRAW_1_WIDE_RW,RW1,LOOP_UNROLL_COUNT
1002: dec ebx
1003: jnz draw_1_wide_rw_loop
1004:
1005: ret
1006:
1007: draw_1_wide_rw_loop endp
1008:
1009: ;-----------------------------------------------------------------------;
1010: ; Macro to draw two read before write bytes, then advance to next scan line.
1011:
1012: DRAW_2_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
1013: &ENTRY_LABEL&ENTRY_INDEX&:
1014: mov ah,[edi] ;see 1-wide RW case for comments
1015: mov [edi],al
1016: mov ah,[edi+1]
1017: mov [edi+1],al
1018: add edi,ecx ;point to the next scan line
1019: endm ;-----------------------------------;
1020:
1021: ; 2-wide read/write.
1022:
1023: align 4
1024: draw_2_wide_rw_loop proc near
1025: UNROLL_LOOP DRAW_2_WIDE_RW,RW2,LOOP_UNROLL_COUNT
1026: dec ebx
1027: jnz draw_2_wide_rw_loop
1028:
1029: ret
1030:
1031: draw_2_wide_rw_loop endp
1032:
1033: ;-----------------------------------------------------------------------;
1034: ; Macro to draw three read before write bytes, then advance to next scan line.
1035:
1036: DRAW_3_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
1037: &ENTRY_LABEL&ENTRY_INDEX&:
1038: mov ah,[edi] ;see 1-wide RW case for comments
1039: mov [edi],al
1040: mov ah,[edi+1]
1041: mov [edi+1],al
1042: mov ah,[edi+2]
1043: mov [edi+2],al
1044: add edi,ecx ;point to the next scan line
1045: endm ;-----------------------------------;
1046:
1047: ; 3-wide read/write.
1048:
1049: align 4
1050: draw_3_wide_rw_loop proc near
1051: UNROLL_LOOP DRAW_3_WIDE_RW,RW3,LOOP_UNROLL_COUNT
1052: dec ebx
1053: jnz draw_3_wide_rw_loop
1054:
1055: ret
1056:
1057: draw_3_wide_rw_loop endp
1058:
1059: ;-----------------------------------------------------------------------;
1060: ; Macro to draw three read before write bytes, then advance to next scan line.
1061:
1062: DRAW_4_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
1063: &ENTRY_LABEL&ENTRY_INDEX&:
1064: mov ah,[edi] ;see 1-wide RW case for comments
1065: mov [edi],al
1066: mov ah,[edi+1]
1067: mov [edi+1],al
1068: mov ah,[edi+2]
1069: mov [edi+2],al
1070: mov ah,[edi+3]
1071: mov [edi+3],al
1072: add edi,ecx ;point to the next scan line
1073: endm ;-----------------------------------;
1074:
1075: ; 4-wide read/write.
1076:
1077: align 4
1078: draw_4_wide_rw_loop proc near
1079: UNROLL_LOOP DRAW_4_WIDE_RW,RW4,LOOP_UNROLL_COUNT
1080: dec ebx
1081: jnz draw_4_wide_rw_loop
1082:
1083: ret
1084:
1085: draw_4_wide_rw_loop endp
1086:
1087: ;-----------------------------------------------------------------------;
1088: ; Unrolled drawing stuff (unrolled to reduce jumps to speed things up),
1089: ; for cases where read before write is NOT required.
1090: ;-----------------------------------------------------------------------;
1091:
1092: ; Tables of entry points into unrolled 1-, 2-, 3-, and 4-wide write-only loops.
1093: ; Note that there may be separate entry tables for various alignments of a
1094: ; specific width, in cases where performance can be improved by using different
1095: ; code for different alignments.
1096:
1097: UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideWEntry,W1,LOOP_UNROLL_COUNT
1098: UNROLL_LOOP_ENTRY_TABLE pfnDraw2WideWEntry,W2,LOOP_UNROLL_COUNT
1099: UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideWEvenEntry,W3_EVEN,LOOP_UNROLL_COUNT
1100: UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideWOddEntry,W3_ODD,LOOP_UNROLL_COUNT
1101: UNROLL_LOOP_ENTRY_TABLE pfnDraw4WideWEntry,W4,LOOP_UNROLL_COUNT
1102: UNROLL_LOOP_ENTRY_TABLE pfnDraw5WideWEvenEntry,W5_EVEN,LOOP_UNROLL_COUNT
1103: UNROLL_LOOP_ENTRY_TABLE pfnDraw5WideWOddEntry,W5_ODD,LOOP_UNROLL_COUNT
1104: UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_0Entry,W6_MOD3_0,LOOP_UNROLL_COUNT
1105: UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_1Entry,W6_MOD3_1,LOOP_UNROLL_COUNT
1106: UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideWMod3_2Entry,W6_MOD3_2,LOOP_UNROLL_COUNT
1107: UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_0Entry,W7_MOD3_0,LOOP_UNROLL_COUNT
1108: UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_1Entry,W7_MOD3_1,LOOP_UNROLL_COUNT
1109: UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_2Entry,W7_MOD3_2,LOOP_UNROLL_COUNT
1110: UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideWMod3_3Entry,W7_MOD3_3,LOOP_UNROLL_COUNT
1111: UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_0Entry,W8_MOD3_0,LOOP_UNROLL_COUNT
1112: UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_1Entry,W8_MOD3_1,LOOP_UNROLL_COUNT
1113: UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_2Entry,W8_MOD3_2,LOOP_UNROLL_COUNT
1114: UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideWMod3_3Entry,W8_MOD3_3,LOOP_UNROLL_COUNT
1115:
1116:
1117: ;-----------------------------------------------------------------------;
1118: ; Unrolled 1-, 2-, 3-, and 4-wide write-only edge-drawing loops.
1119: ;
1120: ; Entry:
1121: ; EAX = fill color, replicated four times
1122: ; EBX = unrolled loop count
1123: ; ECX = scan line width in bytes
1124: ; EDI = start offset
1125: ;
1126: ; EBX, EDI modified. All other registers preserved.
1127:
1128: ;-----------------------------------------------------------------------;
1129: ; Macro to draw one write-only byte, then advance to next scan line.
1130:
1131: DRAW_1_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
1132: &ENTRY_LABEL&ENTRY_INDEX&:
1133: mov [edi],al ;draw the pixel
1134: add edi,ecx ;point to the next scan line
1135: endm ;-----------------------------------;
1136:
1137: ; 1-wide write-only.
1138:
1139: align 4
1140: draw_1_wide_w_loop proc near
1141: UNROLL_LOOP DRAW_1_WIDE_W,W1,LOOP_UNROLL_COUNT
1142: dec ebx
1143: jnz draw_1_wide_w_loop
1144:
1145: ret
1146:
1147: draw_1_wide_w_loop endp
1148:
1149: ;-----------------------------------------------------------------------;
1150: ; Macro to draw two write-only bytes, then advance to next scan line.
1151:
1152: DRAW_2_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
1153: &ENTRY_LABEL&ENTRY_INDEX&:
1154: mov [edi],ax
1155: add edi,ecx ;point to the next scan line
1156: endm ;-----------------------------------;
1157:
1158: ; 2-wide write-only.
1159:
1160: align 4
1161: draw_2_wide_w_loop proc near
1162: UNROLL_LOOP DRAW_2_WIDE_W,W2,LOOP_UNROLL_COUNT
1163: dec ebx
1164: jnz draw_2_wide_w_loop
1165:
1166: ret
1167:
1168: draw_2_wide_w_loop endp
1169:
1170: ;-----------------------------------------------------------------------;
1171: ; Macro to draw three write-only bytes, then advance to next scan line.
1172: ; Optimized for even start address.
1173:
1174: DRAW_3_WIDE_W_EVEN macro ENTRY_LABEL,ENTRY_INDEX
1175: &ENTRY_LABEL&ENTRY_INDEX&:
1176: mov [edi],ax
1177: mov [edi+2],al
1178: add edi,ecx ;point to the next scan line
1179: endm ;-----------------------------------;
1180:
1181: ; 3-wide write-only, starting at an even address.
1182:
1183: align 4
1184: draw_3_wide_w_even_loop proc near
1185: UNROLL_LOOP DRAW_3_WIDE_W_EVEN,W3_EVEN,LOOP_UNROLL_COUNT
1186: dec ebx
1187: jnz draw_3_wide_w_even_loop
1188:
1189: ret
1190:
1191: draw_3_wide_w_even_loop endp
1192:
1193: ;-----------------------------------------------------------------------;
1194: ; Macro to draw three write-only bytes, then advance to next scan line.
1195: ; Optimized for odd start address.
1196:
1197: DRAW_3_WIDE_W_ODD macro ENTRY_LABEL,ENTRY_INDEX
1198: &ENTRY_LABEL&ENTRY_INDEX&:
1199: mov [edi],al
1200: mov [edi+1],ax
1201: add edi,ecx ;point to the next scan line
1202: endm ;-----------------------------------;
1203:
1204: ; 3-wide write-only, starting at an odd address.
1205:
1206: align 4
1207: draw_3_wide_w_odd_loop proc near
1208: UNROLL_LOOP DRAW_3_WIDE_W_ODD,W3_ODD,LOOP_UNROLL_COUNT
1209: dec ebx
1210: jnz draw_3_wide_w_odd_loop
1211:
1212: ret
1213:
1214: draw_3_wide_w_odd_loop endp
1215:
1216:
1217: ;-----------------------------------------------------------------------;
1218: ; Macro to draw four write-only bytes, then advance to next scan line.
1219:
1220: DRAW_4_WIDE_W macro ENTRY_LABEL,ENTRY_INDEX
1221: &ENTRY_LABEL&ENTRY_INDEX&:
1222: mov [edi],eax
1223: add edi,ecx ;point to the next scan line
1224: endm ;-----------------------------------;
1225:
1226: ; 4-wide write-only.
1227:
1228: align 4
1229: draw_4_wide_w_loop proc near
1230: UNROLL_LOOP DRAW_4_WIDE_W,W4,LOOP_UNROLL_COUNT
1231: dec ebx
1232: jnz draw_4_wide_w_loop
1233:
1234: ret
1235:
1236: draw_4_wide_w_loop endp
1237:
1238:
1239: ;-----------------------------------------------------------------------;
1240: ; Macro to draw five write-only bytes, then advance to next scan line.
1241: ; Optimized for even start address.
1242:
1243: DRAW_5_WIDE_W_EVEN macro ENTRY_LABEL,ENTRY_INDEX
1244: &ENTRY_LABEL&ENTRY_INDEX&:
1245: mov [edi],eax
1246: mov [edi+4],al
1247: add edi,ecx ;point to the next scan line
1248: endm ;-----------------------------------;
1249:
1250: ; 5-wide write-only, starting at an even address.
1251:
1252: align 4
1253: draw_5_wide_w_even_loop proc near
1254: UNROLL_LOOP DRAW_5_WIDE_W_EVEN,W5_EVEN,LOOP_UNROLL_COUNT
1255: dec ebx
1256: jnz draw_5_wide_w_even_loop
1257:
1258: ret
1259:
1260: draw_5_wide_w_even_loop endp
1261:
1262:
1263: ;-----------------------------------------------------------------------;
1264: ; Macro to draw five write-only bytes, then advance to next scan line.
1265: ; Optimized for odd start address.
1266:
1267: DRAW_5_WIDE_W_ODD macro ENTRY_LABEL,ENTRY_INDEX
1268: &ENTRY_LABEL&ENTRY_INDEX&:
1269: mov [edi],al
1270: mov [edi+1],eax
1271: add edi,ecx ;point to the next scan line
1272: endm ;-----------------------------------;
1273:
1274: ; 5-wide write-only, starting at an odd address.
1275:
1276: align 4
1277: draw_5_wide_w_odd_loop proc near
1278: UNROLL_LOOP DRAW_5_WIDE_W_ODD,W5_ODD,LOOP_UNROLL_COUNT
1279: dec ebx
1280: jnz draw_5_wide_w_odd_loop
1281:
1282: ret
1283:
1284: draw_5_wide_w_odd_loop endp
1285:
1286:
1287: ;-----------------------------------------------------------------------;
1288: ; Macro to draw six write-only bytes, then advance to next scan line.
1289: ; Optimized for start address MOD 3 == 0.
1290:
1291: DRAW_6_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
1292: &ENTRY_LABEL&ENTRY_INDEX&:
1293: mov [edi],eax
1294: mov [edi+4],ax
1295: add edi,ecx ;point to the next scan line
1296: endm ;-----------------------------------;
1297:
1298: ; 6-wide write-only, starting at MOD 3 == 0.
1299:
1300: align 4
1301: draw_6_wide_w_mod3_0_loop proc near
1302: UNROLL_LOOP DRAW_6_WIDE_W_MOD3_0,W6_MOD3_0,LOOP_UNROLL_COUNT
1303: dec ebx
1304: jnz draw_6_wide_w_mod3_0_loop
1305:
1306: ret
1307:
1308: draw_6_wide_w_mod3_0_loop endp
1309:
1310:
1311: ;-----------------------------------------------------------------------;
1312: ; Macro to draw six write-only bytes, then advance to next scan line.
1313: ; Optimized for start address MOD 3 == 1 or 3.
1314:
1315: DRAW_6_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
1316: &ENTRY_LABEL&ENTRY_INDEX&:
1317: mov [edi],al
1318: mov [edi+1],eax
1319: mov [edi+5],al
1320: add edi,ecx ;point to the next scan line
1321: endm ;-----------------------------------;
1322:
1323: ; 6-wide write-only, starting at MOD 3 == 1 or 3.
1324:
1325: align 4
1326: draw_6_wide_w_mod3_1_loop proc near
1327: UNROLL_LOOP DRAW_6_WIDE_W_MOD3_1,W6_MOD3_1,LOOP_UNROLL_COUNT
1328: dec ebx
1329: jnz draw_6_wide_w_mod3_1_loop
1330:
1331: ret
1332:
1333: draw_6_wide_w_mod3_1_loop endp
1334:
1335:
1336: ;-----------------------------------------------------------------------;
1337: ; Macro to draw six write-only bytes, then advance to next scan line.
1338: ; Optimized for start address MOD 3 == 2.
1339:
1340: DRAW_6_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
1341: &ENTRY_LABEL&ENTRY_INDEX&:
1342: mov [edi],ax
1343: mov [edi+2],eax
1344: add edi,ecx ;point to the next scan line
1345: endm ;-----------------------------------;
1346:
1347: ; 6-wide write-only, starting at MOD 3 == 2.
1348:
1349: align 4
1350: draw_6_wide_w_mod3_2_loop proc near
1351: UNROLL_LOOP DRAW_6_WIDE_W_MOD3_2,W6_MOD3_2,LOOP_UNROLL_COUNT
1352: dec ebx
1353: jnz draw_6_wide_w_mod3_2_loop
1354:
1355: ret
1356:
1357: draw_6_wide_w_mod3_2_loop endp
1358:
1359:
1360: ;-----------------------------------------------------------------------;
1361: ; Macro to draw seven write-only bytes, then advance to next scan line.
1362: ; Optimized for start address MOD 3 == 0.
1363:
1364: DRAW_7_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
1365: &ENTRY_LABEL&ENTRY_INDEX&:
1366: mov [edi],eax
1367: mov [edi+4],ax
1368: mov [edi+6],al
1369: add edi,ecx ;point to the next scan line
1370: endm ;-----------------------------------;
1371:
1372: ; 7-wide write-only, starting at MOD 3 == 0.
1373:
1374: align 4
1375: draw_7_wide_w_mod3_0_loop proc near
1376: UNROLL_LOOP DRAW_7_WIDE_W_MOD3_0,W7_MOD3_0,LOOP_UNROLL_COUNT
1377: dec ebx
1378: jnz draw_7_wide_w_mod3_0_loop
1379:
1380: ret
1381:
1382: draw_7_wide_w_mod3_0_loop endp
1383:
1384:
1385: ;-----------------------------------------------------------------------;
1386: ; Macro to draw seven write-only bytes, then advance to next scan line.
1387: ; Optimized for start address MOD 3 == 1.
1388:
1389: DRAW_7_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
1390: &ENTRY_LABEL&ENTRY_INDEX&:
1391: mov [edi],al
1392: mov [edi+1],ax
1393: mov [edi+3],eax
1394: add edi,ecx ;point to the next scan line
1395: endm ;-----------------------------------;
1396:
1397: ; 7-wide write-only, starting at MOD 3 == 0.
1398:
1399: align 4
1400: draw_7_wide_w_mod3_1_loop proc near
1401: UNROLL_LOOP DRAW_7_WIDE_W_MOD3_1,W7_MOD3_1,LOOP_UNROLL_COUNT
1402: dec ebx
1403: jnz draw_7_wide_w_mod3_1_loop
1404:
1405: ret
1406:
1407: draw_7_wide_w_mod3_1_loop endp
1408:
1409:
1410: ;-----------------------------------------------------------------------;
1411: ; Macro to draw seven write-only bytes, then advance to next scan line.
1412: ; Optimized for start address MOD 3 == 2.
1413:
1414: DRAW_7_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
1415: &ENTRY_LABEL&ENTRY_INDEX&:
1416: mov [edi],ax
1417: mov [edi+2],eax
1418: mov [edi+6],al
1419: add edi,ecx ;point to the next scan line
1420: endm ;-----------------------------------;
1421:
1422: ; 7-wide write-only, starting at MOD 3 == 2.
1423:
1424: align 4
1425: draw_7_wide_w_mod3_2_loop proc near
1426: UNROLL_LOOP DRAW_7_WIDE_W_MOD3_2,W7_MOD3_2,LOOP_UNROLL_COUNT
1427: dec ebx
1428: jnz draw_7_wide_w_mod3_2_loop
1429:
1430: ret
1431:
1432: draw_7_wide_w_mod3_2_loop endp
1433:
1434:
1435: ;-----------------------------------------------------------------------;
1436: ; Macro to draw seven write-only bytes, then advance to next scan line.
1437: ; Optimized for start address MOD 3 == 3.
1438:
1439: DRAW_7_WIDE_W_MOD3_3 macro ENTRY_LABEL,ENTRY_INDEX
1440: &ENTRY_LABEL&ENTRY_INDEX&:
1441: mov [edi],al
1442: mov [edi+1],eax
1443: mov [edi+5],ax
1444: add edi,ecx ;point to the next scan line
1445: endm ;-----------------------------------;
1446:
1447: ; 7-wide write-only, starting at MOD 3 == 3.
1448:
1449: align 4
1450: draw_7_wide_w_mod3_3_loop proc near
1451: UNROLL_LOOP DRAW_7_WIDE_W_MOD3_3,W7_MOD3_3,LOOP_UNROLL_COUNT
1452: dec ebx
1453: jnz draw_7_wide_w_mod3_3_loop
1454:
1455: ret
1456:
1457: draw_7_wide_w_mod3_3_loop endp
1458:
1459:
1460: ;-----------------------------------------------------------------------;
1461: ; Macro to draw eight write-only bytes, then advance to next scan line.
1462: ; Optimized for start address MOD 3 == 0.
1463:
1464: DRAW_8_WIDE_W_MOD3_0 macro ENTRY_LABEL,ENTRY_INDEX
1465: &ENTRY_LABEL&ENTRY_INDEX&:
1466: mov [edi],eax
1467: mov [edi+4],eax
1468: add edi,ecx ;point to the next scan line
1469: endm ;-----------------------------------;
1470:
1471: ; 8-wide write-only, starting at MOD 3 == 0.
1472:
1473: align 4
1474: draw_8_wide_w_mod3_0_loop proc near
1475: UNROLL_LOOP DRAW_8_WIDE_W_MOD3_0,W8_MOD3_0,LOOP_UNROLL_COUNT
1476: dec ebx
1477: jnz draw_8_wide_w_mod3_0_loop
1478:
1479: ret
1480:
1481: draw_8_wide_w_mod3_0_loop endp
1482:
1483:
1484: ;-----------------------------------------------------------------------;
1485: ; Macro to draw eight write-only bytes, then advance to next scan line.
1486: ; Optimized for start address MOD 3 == 1.
1487:
1488: DRAW_8_WIDE_W_MOD3_1 macro ENTRY_LABEL,ENTRY_INDEX
1489: &ENTRY_LABEL&ENTRY_INDEX&:
1490: mov [edi],al
1491: mov [edi+1],ax
1492: mov [edi+3],eax
1493: mov [edi+7],al
1494: add edi,ecx ;point to the next scan line
1495: endm ;-----------------------------------;
1496:
1497: ; 8-wide write-only, starting at MOD 3 == 0.
1498:
1499: align 4
1500: draw_8_wide_w_mod3_1_loop proc near
1501: UNROLL_LOOP DRAW_8_WIDE_W_MOD3_1,W8_MOD3_1,LOOP_UNROLL_COUNT
1502: dec ebx
1503: jnz draw_8_wide_w_mod3_1_loop
1504:
1505: ret
1506:
1507: draw_8_wide_w_mod3_1_loop endp
1508:
1509:
1510: ;-----------------------------------------------------------------------;
1511: ; Macro to draw eight write-only bytes, then advance to next scan line.
1512: ; Optimized for start address MOD 3 == 2.
1513:
1514: DRAW_8_WIDE_W_MOD3_2 macro ENTRY_LABEL,ENTRY_INDEX
1515: &ENTRY_LABEL&ENTRY_INDEX&:
1516: mov [edi],ax
1517: mov [edi+2],eax
1518: mov [edi+6],ax
1519: add edi,ecx ;point to the next scan line
1520: endm ;-----------------------------------;
1521:
1522: ; 8-wide write-only, starting at MOD 3 == 2.
1523:
1524: align 4
1525: draw_8_wide_w_mod3_2_loop proc near
1526: UNROLL_LOOP DRAW_8_WIDE_W_MOD3_2,W8_MOD3_2,LOOP_UNROLL_COUNT
1527: dec ebx
1528: jnz draw_8_wide_w_mod3_2_loop
1529:
1530: ret
1531:
1532: draw_8_wide_w_mod3_2_loop endp
1533:
1534:
1535: ;-----------------------------------------------------------------------;
1536: ; Macro to draw eight write-only bytes, then advance to next scan line.
1537: ; Optimized for start address MOD 3 == 3.
1538:
1539: DRAW_8_WIDE_W_MOD3_3 macro ENTRY_LABEL,ENTRY_INDEX
1540: &ENTRY_LABEL&ENTRY_INDEX&:
1541: mov [edi],al
1542: mov [edi+1],eax
1543: mov [edi+5],ax
1544: mov [edi+7],al
1545: add edi,ecx ;point to the next scan line
1546: endm ;-----------------------------------;
1547:
1548: ; 8-wide write-only, starting at MOD 3 == 3.
1549:
1550: align 4
1551: draw_8_wide_w_mod3_3_loop proc near
1552: UNROLL_LOOP DRAW_8_WIDE_W_MOD3_3,W8_MOD3_3,LOOP_UNROLL_COUNT
1553: dec ebx
1554: jnz draw_8_wide_w_mod3_3_loop
1555:
1556: ret
1557:
1558: draw_8_wide_w_mod3_3_loop endp
1559:
1560: ;-----------------------------------------------------------------------;
1561: ; Unrolled loop stuff for wide replace-type rops (arbitrary width).
1562: ;-----------------------------------------------------------------------;
1563:
1564: ; Tables of entry points into unrolled wide write-only loops.
1565: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW00Entry,W_00,LOOP_UNROLL_COUNT
1566: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW01Entry,W_01,LOOP_UNROLL_COUNT
1567: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW02Entry,W_02,LOOP_UNROLL_COUNT
1568: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW03Entry,W_03,LOOP_UNROLL_COUNT
1569: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW10Entry,W_10,LOOP_UNROLL_COUNT
1570: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW11Entry,W_11,LOOP_UNROLL_COUNT
1571: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW12Entry,W_12,LOOP_UNROLL_COUNT
1572: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW13Entry,W_13,LOOP_UNROLL_COUNT
1573: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW20Entry,W_20,LOOP_UNROLL_COUNT
1574: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW21Entry,W_21,LOOP_UNROLL_COUNT
1575: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW22Entry,W_22,LOOP_UNROLL_COUNT
1576: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW23Entry,W_23,LOOP_UNROLL_COUNT
1577: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW30Entry,W_30,LOOP_UNROLL_COUNT
1578: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW31Entry,W_31,LOOP_UNROLL_COUNT
1579: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW32Entry,W_32,LOOP_UNROLL_COUNT
1580: UNROLL_LOOP_ENTRY_TABLE pfnDrawWideW33Entry,W_33,LOOP_UNROLL_COUNT
1581:
1582: ;-----------------------------------------------------------------------;
1583: ; Macro to draw n write-only bytes, 0 leading bytes, 0 trailing bytes,
1584: ; then advance to next scan line.
1585:
1586: DRAW_WIDE_W_00 macro ENTRY_LABEL,ENTRY_INDEX
1587: &ENTRY_LABEL&ENTRY_INDEX&:
1588: mov ecx,esi ;# of whole dwords
1589: rep stosd ;fill all whole bytes as dwords
1590: add edi,edx ;point to the next scan line
1591: endm ;-----------------------------------;
1592:
1593: ; N-wide write-only, 0 leading bytes, 0 trailing bytes.
1594: ; EAX = 0ffffh
1595: ; EBX = count of scans to fill
1596: ; EDX = offset from end of one scan's fill to start of next
1597: ; ESI = # of dwords to fill
1598: ; EDI = target address to fill
1599:
1600: align 4
1601: draw_wide_w_00_loop proc near
1602: UNROLL_LOOP DRAW_WIDE_W_00,W_00,LOOP_UNROLL_COUNT
1603: dec ebx
1604: jnz draw_wide_w_00_loop
1605:
1606: ret
1607:
1608: draw_wide_w_00_loop endp
1609:
1610:
1611: ;-----------------------------------------------------------------------;
1612: ; Macro to draw n write-only bytes, 0 leading bytes, 1 trailing byte,
1613: ; then advance to next scan line.
1614:
1615: DRAW_WIDE_W_01 macro ENTRY_LABEL,ENTRY_INDEX
1616: &ENTRY_LABEL&ENTRY_INDEX&:
1617: mov ecx,esi ;# of whole dwords
1618: rep stosd ;fill whole bytes as dwords
1619: mov [edi],al ;fill the trailing byte
1620: inc edi
1621: add edi,edx ;point to the next scan line
1622: endm ;-----------------------------------;
1623:
1624: ; N-wide write-only, 0 leading bytes, 1 trailing byte.
1625: ; EAX = # of dwords to fill
1626: ; EBX = count of scans to fill
1627: ; EDX = offset from end of one scan's fill to start of next
1628: ; ESI = # of dwords to fill
1629: ; EDI = target address to fill
1630:
1631: align 4
1632: draw_wide_w_01_loop proc near
1633: UNROLL_LOOP DRAW_WIDE_W_01,W_01,LOOP_UNROLL_COUNT
1634: dec ebx
1635: jnz draw_wide_w_01_loop
1636:
1637: ret
1638:
1639: draw_wide_w_01_loop endp
1640:
1641:
1642: ;-----------------------------------------------------------------------;
1643: ; Macro to draw n write-only bytes, 0 leading bytes, 2 trailing bytes,
1644: ; then advance to next scan line.
1645:
1646: DRAW_WIDE_W_02 macro ENTRY_LABEL,ENTRY_INDEX
1647: &ENTRY_LABEL&ENTRY_INDEX&:
1648: mov ecx,esi ;# of whole dwords
1649: rep stosd ;fill whole bytes as dwords
1650: mov [edi],ax ;fill the trailing word
1651: add edi,2
1652: add edi,edx ;point to the next scan line
1653: endm ;-----------------------------------;
1654:
1655: ; N-wide write-only, 0 leading bytes, 2 trailing byte.
1656: ; EAX = # of dwords to fill
1657: ; EBX = count of scans to fill
1658: ; EDX = offset from end of one scan's fill to start of next
1659: ; ESI = # of dwords to fill
1660: ; EDI = target address to fill
1661:
1662: align 4
1663: draw_wide_w_02_loop proc near
1664: UNROLL_LOOP DRAW_WIDE_W_02,W_02,LOOP_UNROLL_COUNT
1665: dec ebx
1666: jnz draw_wide_w_02_loop
1667:
1668: ret
1669:
1670: draw_wide_w_02_loop endp
1671:
1672:
1673: ;-----------------------------------------------------------------------;
1674: ; Macro to draw n write-only bytes, 0 leading bytes, 3 trailing bytes,
1675: ; then advance to next scan line.
1676:
1677: DRAW_WIDE_W_03 macro ENTRY_LABEL,ENTRY_INDEX
1678: &ENTRY_LABEL&ENTRY_INDEX&:
1679: mov ecx,esi ;# of whole dwords
1680: rep stosd ;fill whole bytes as dwords
1681: mov [edi],ax ;fill the leading word
1682: mov [edi+2],al ;fill the trailing byte
1683: add edi,3
1684: add edi,edx ;point to the next scan line
1685: endm ;-----------------------------------;
1686:
1687: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
1688: ; EAX = # of dwords to fill
1689: ; EBX = count of scans to fill
1690: ; EDX = offset from end of one scan's fill to start of next
1691: ; ESI = # of dwords to fill
1692: ; EDI = target address to fill
1693:
1694: align 4
1695: draw_wide_w_03_loop proc near
1696: UNROLL_LOOP DRAW_WIDE_W_03,W_03,LOOP_UNROLL_COUNT
1697: dec ebx
1698: jnz draw_wide_w_03_loop
1699:
1700: ret
1701:
1702: draw_wide_w_03_loop endp
1703:
1704:
1705: ;-----------------------------------------------------------------------;
1706: ; Macro to draw n write-only bytes, 1 leading byte, 0 trailing bytes,
1707: ; then advance to next scan line.
1708:
1709: DRAW_WIDE_W_10 macro ENTRY_LABEL,ENTRY_INDEX
1710: &ENTRY_LABEL&ENTRY_INDEX&:
1711: mov [edi],al ;fill the leading byte
1712: inc edi
1713: mov ecx,esi ;# of whole dwords
1714: rep stosd ;fill all whole bytes as dwords
1715: add edi,edx ;point to the next scan line
1716: endm ;-----------------------------------;
1717:
1718: ; N-wide write-only, 1 leading byte, 0 trailing bytes.
1719: ; EAX = # of dwords to fill
1720: ; EBX = count of scans to fill
1721: ; EDX = offset from end of one scan's fill to start of next
1722: ; ESI = # of dwords to fill
1723: ; EDI = target address to fill
1724:
1725: align 4
1726: draw_wide_w_10_loop proc near
1727: UNROLL_LOOP DRAW_WIDE_W_10,W_10,LOOP_UNROLL_COUNT
1728: dec ebx
1729: jnz draw_wide_w_10_loop
1730:
1731: ret
1732:
1733: draw_wide_w_10_loop endp
1734:
1735:
1736: ;-----------------------------------------------------------------------;
1737: ; Macro to draw n write-only bytes, 1 leading byte, 1 trailing byte,
1738: ; then advance to next scan line.
1739:
1740: DRAW_WIDE_W_11 macro ENTRY_LABEL,ENTRY_INDEX
1741: &ENTRY_LABEL&ENTRY_INDEX&:
1742: mov [edi],al ;fill the leading byte
1743: inc edi
1744: mov ecx,esi ;# of whole dwords
1745: rep stosd ;fill whole bytes as dwords
1746: mov [edi],al ;fill the trailing byte
1747: inc edi
1748: add edi,edx ;point to the next scan line
1749: endm ;-----------------------------------;
1750:
1751: ; N-wide write-only, 1 leading bytes, 1 trailing byte.
1752: ; EAX = # of dwords to fill
1753: ; EBX = count of scans to fill
1754: ; EDX = offset from end of one scan's fill to start of next
1755: ; ESI = # of dwords to fill
1756: ; EDI = target address to fill
1757:
1758: align 4
1759: draw_wide_w_11_loop proc near
1760: UNROLL_LOOP DRAW_WIDE_W_11,W_11,LOOP_UNROLL_COUNT
1761: dec ebx
1762: jnz draw_wide_w_11_loop
1763:
1764: ret
1765:
1766: draw_wide_w_11_loop endp
1767:
1768:
1769: ;-----------------------------------------------------------------------;
1770: ; Macro to draw n write-only bytes, 1 leading byte, 2 trailing bytes,
1771: ; then advance to next scan line.
1772:
1773: DRAW_WIDE_W_12 macro ENTRY_LABEL,ENTRY_INDEX
1774: &ENTRY_LABEL&ENTRY_INDEX&:
1775: mov [edi],al ;fill the leading byte
1776: inc edi
1777: mov ecx,esi ;# of whole dwords
1778: rep stosd ;fill whole bytes as dwords
1779: mov [edi],ax ;fill the trailing word
1780: add edi,2
1781: add edi,edx ;point to the next scan line
1782: endm ;-----------------------------------;
1783:
1784: ; N-wide write-only, 1 leading bytes, 2 trailing byte.
1785: ; EAX = # of dwords to fill
1786: ; EBX = count of scans to fill
1787: ; EDX = offset from end of one scan's fill to start of next
1788: ; ESI = # of dwords to fill
1789: ; EDI = target address to fill
1790:
1791: align 4
1792: draw_wide_w_12_loop proc near
1793: UNROLL_LOOP DRAW_WIDE_W_12,W_12,LOOP_UNROLL_COUNT
1794: dec ebx
1795: jnz draw_wide_w_12_loop
1796:
1797: ret
1798:
1799: draw_wide_w_12_loop endp
1800:
1801:
1802: ;-----------------------------------------------------------------------;
1803: ; Macro to draw n write-only bytes, 1 leading byte, 3 trailing bytes,
1804: ; then advance to next scan line.
1805:
1806: DRAW_WIDE_W_13 macro ENTRY_LABEL,ENTRY_INDEX
1807: &ENTRY_LABEL&ENTRY_INDEX&:
1808: mov [edi],al ;fill the leading byte
1809: inc edi
1810: mov ecx,esi ;# of whole dwords
1811: rep stosd ;fill whole bytes as dwords
1812: mov [edi],ax ;fill the trailing word
1813: mov [edi+2],al ;fill the trailing byte
1814: add edi,3
1815: add edi,edx ;point to the next scan line
1816: endm ;-----------------------------------;
1817:
1818: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
1819: ; EAX = # of dwords to fill
1820: ; EBX = count of scans to fill
1821: ; EDX = offset from end of one scan's fill to start of next
1822: ; ESI = # of dwords to fill
1823: ; EDI = target address to fill
1824:
1825: align 4
1826: draw_wide_w_13_loop proc near
1827: UNROLL_LOOP DRAW_WIDE_W_13,W_13,LOOP_UNROLL_COUNT
1828: dec ebx
1829: jnz draw_wide_w_13_loop
1830:
1831: ret
1832:
1833: draw_wide_w_13_loop endp
1834:
1835:
1836: ;-----------------------------------------------------------------------;
1837: ; Macro to draw n write-only bytes, 2 leading bytes, 0 trailing bytes,
1838: ; then advance to next scan line.
1839:
1840: DRAW_WIDE_W_20 macro ENTRY_LABEL,ENTRY_INDEX
1841: &ENTRY_LABEL&ENTRY_INDEX&:
1842: mov [edi],ax ;fill the leading word
1843: add edi,2
1844: mov ecx,esi ;# of whole dwords
1845: rep stosd ;fill all whole bytes as dwords
1846: add edi,edx ;point to the next scan line
1847: endm ;-----------------------------------;
1848:
1849: ; N-wide write-only, 2 leading bytes, 0 trailing bytes.
1850: ; EAX = # of dwords to fill
1851: ; EBX = count of scans to fill
1852: ; EDX = offset from end of one scan's fill to start of next
1853: ; ESI = # of dwords to fill
1854: ; EDI = target address to fill
1855:
1856: align 4
1857: draw_wide_w_20_loop proc near
1858: UNROLL_LOOP DRAW_WIDE_W_20,W_20,LOOP_UNROLL_COUNT
1859: dec ebx
1860: jnz draw_wide_w_20_loop
1861:
1862: ret
1863:
1864: draw_wide_w_20_loop endp
1865:
1866:
1867: ;-----------------------------------------------------------------------;
1868: ; Macro to draw n write-only bytes, 2 leading bytes, 1 trailing byte,
1869: ; then advance to next scan line.
1870:
1871: DRAW_WIDE_W_21 macro ENTRY_LABEL,ENTRY_INDEX
1872: &ENTRY_LABEL&ENTRY_INDEX&:
1873: mov [edi],ax ;fill the leading word
1874: add edi,2
1875: mov ecx,esi ;# of whole dwords
1876: rep stosd ;fill whole bytes as dwords
1877: mov [edi],al ;fill the trailing byte
1878: inc edi
1879: add edi,edx ;point to the next scan line
1880: endm ;-----------------------------------;
1881:
1882: ; N-wide write-only, 2 leading bytess, 1 trailing byte.
1883: ; EAX = # of dwords to fill
1884: ; EBX = count of scans to fill
1885: ; EDX = offset from end of one scan's fill to start of next
1886: ; ESI = # of dwords to fill
1887: ; EDI = target address to fill
1888:
1889: align 4
1890: draw_wide_w_21_loop proc near
1891: UNROLL_LOOP DRAW_WIDE_W_21,W_21,LOOP_UNROLL_COUNT
1892: dec ebx
1893: jnz draw_wide_w_21_loop
1894:
1895: ret
1896:
1897: draw_wide_w_21_loop endp
1898:
1899:
1900: ;-----------------------------------------------------------------------;
1901: ; Macro to draw n write-only bytes, 2 leading bytes, 2 trailing bytes,
1902: ; then advance to next scan line.
1903:
1904: DRAW_WIDE_W_22 macro ENTRY_LABEL,ENTRY_INDEX
1905: &ENTRY_LABEL&ENTRY_INDEX&:
1906: mov [edi],ax ;fill the leading word
1907: add edi,2
1908: mov ecx,esi ;# of whole dwords
1909: rep stosd ;fill whole bytes as dwords
1910: mov [edi],ax ;fill the trailing word
1911: add edi,2
1912: add edi,edx ;point to the next scan line
1913: endm ;-----------------------------------;
1914:
1915: ; N-wide write-only, 2 leading bytess, 2 trailing byte.
1916: ; EAX = # of dwords to fill
1917: ; EBX = count of scans to fill
1918: ; EDX = offset from end of one scan's fill to start of next
1919: ; ESI = # of dwords to fill
1920: ; EDI = target address to fill
1921:
1922: align 4
1923: draw_wide_w_22_loop proc near
1924: UNROLL_LOOP DRAW_WIDE_W_22,W_22,LOOP_UNROLL_COUNT
1925: dec ebx
1926: jnz draw_wide_w_22_loop
1927:
1928: ret
1929:
1930: draw_wide_w_22_loop endp
1931:
1932:
1933: ;-----------------------------------------------------------------------;
1934: ; Macro to draw n write-only bytes, 2 leading bytes, 3 trailing bytes,
1935: ; then advance to next scan line.
1936:
1937: DRAW_WIDE_W_23 macro ENTRY_LABEL,ENTRY_INDEX
1938: &ENTRY_LABEL&ENTRY_INDEX&:
1939: mov [edi],ax ;fill the leading word
1940: add edi,2
1941: mov ecx,esi ;# of whole dwords
1942: rep stosd ;fill whole bytes as dwords
1943: mov [edi],ax ;fill the trailing word
1944: mov [edi+2],al ;fill the trailing byte
1945: add edi,3
1946: add edi,edx ;point to the next scan line
1947: endm ;-----------------------------------;
1948:
1949: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
1950: ; EAX = # of dwords to fill
1951: ; EBX = count of scans to fill
1952: ; EDX = offset from end of one scan's fill to start of next
1953: ; ESI = # of dwords to fill
1954: ; EDI = target address to fill
1955:
1956: align 4
1957: draw_wide_w_23_loop proc near
1958: UNROLL_LOOP DRAW_WIDE_W_23,W_23,LOOP_UNROLL_COUNT
1959: dec ebx
1960: jnz draw_wide_w_23_loop
1961:
1962: ret
1963:
1964: draw_wide_w_23_loop endp
1965:
1966:
1967: ;-----------------------------------------------------------------------;
1968: ; Macro to draw n write-only bytes, 3 leading bytes, 0 trailing bytes,
1969: ; then advance to next scan line.
1970:
1971: DRAW_WIDE_W_30 macro ENTRY_LABEL,ENTRY_INDEX
1972: &ENTRY_LABEL&ENTRY_INDEX&:
1973: mov [edi],al ;fill the leading byte
1974: mov [edi+1],ax ;fill the leading word
1975: add edi,3
1976: mov ecx,esi ;# of whole dwords
1977: rep stosd ;fill all whole bytes as dwords
1978: add edi,edx ;point to the next scan line
1979: endm ;-----------------------------------;
1980:
1981: ; N-wide write-only, 3 leading bytes, 0 trailing bytes.
1982: ; EAX = # of dwords to fill
1983: ; EBX = count of scans to fill
1984: ; EDX = offset from end of one scan's fill to start of next
1985: ; ESI = # of dwords to fill
1986: ; EDI = target address to fill
1987:
1988: align 4
1989: draw_wide_w_30_loop proc near
1990: UNROLL_LOOP DRAW_WIDE_W_30,W_30,LOOP_UNROLL_COUNT
1991: dec ebx
1992: jnz draw_wide_w_30_loop
1993:
1994: ret
1995:
1996: draw_wide_w_30_loop endp
1997:
1998:
1999: ;-----------------------------------------------------------------------;
2000: ; Macro to draw n write-only bytes, 3 leading bytes, 1 trailing byte,
2001: ; then advance to next scan line.
2002:
2003: DRAW_WIDE_W_31 macro ENTRY_LABEL,ENTRY_INDEX
2004: &ENTRY_LABEL&ENTRY_INDEX&:
2005: mov [edi],al ;fill the leading byte
2006: mov [edi+1],ax ;fill the leading word
2007: add edi,3
2008: mov ecx,esi ;# of whole dwords
2009: rep stosd ;fill whole bytes as dwords
2010: mov [edi],al ;fill the trailing byte
2011: inc edi
2012: add edi,edx ;point to the next scan line
2013: endm ;-----------------------------------;
2014:
2015: ; N-wide write-only, 3 leading bytess, 1 trailing byte.
2016: ; EAX = # of dwords to fill
2017: ; EBX = count of scans to fill
2018: ; EDX = offset from end of one scan's fill to start of next
2019: ; ESI = # of dwords to fill
2020: ; EDI = target address to fill
2021:
2022: align 4
2023: draw_wide_w_31_loop proc near
2024: UNROLL_LOOP DRAW_WIDE_W_31,W_31,LOOP_UNROLL_COUNT
2025: dec ebx
2026: jnz draw_wide_w_31_loop
2027:
2028: ret
2029:
2030: draw_wide_w_31_loop endp
2031:
2032:
2033: ;-----------------------------------------------------------------------;
2034: ; Macro to draw n write-only bytes, 3 leading bytes, 2 trailing bytes,
2035: ; then advance to next scan line.
2036:
2037: DRAW_WIDE_W_32 macro ENTRY_LABEL,ENTRY_INDEX
2038: &ENTRY_LABEL&ENTRY_INDEX&:
2039: mov [edi],al ;fill the leading byte
2040: mov [edi+1],ax ;fill the leading word
2041: add edi,3
2042: mov ecx,esi ;# of whole dwords
2043: rep stosd ;fill whole bytes as dwords
2044: mov [edi],ax ;fill the trailing word
2045: add edi,2
2046: add edi,edx ;point to the next scan line
2047: endm ;-----------------------------------;
2048:
2049: ; N-wide write-only, 3 leading bytess, 2 trailing byte.
2050: ; EAX = # of dwords to fill
2051: ; EBX = count of scans to fill
2052: ; EDX = offset from end of one scan's fill to start of next
2053: ; ESI = # of dwords to fill
2054: ; EDI = target address to fill
2055:
2056: align 4
2057: draw_wide_w_32_loop proc near
2058: UNROLL_LOOP DRAW_WIDE_W_32,W_32,LOOP_UNROLL_COUNT
2059: dec ebx
2060: jnz draw_wide_w_32_loop
2061:
2062: ret
2063:
2064: draw_wide_w_32_loop endp
2065:
2066:
2067: ;-----------------------------------------------------------------------;
2068: ; Macro to draw n write-only bytes, 3 leading bytes, 3 trailing bytes,
2069: ; then advance to next scan line.
2070:
2071: DRAW_WIDE_W_33 macro ENTRY_LABEL,ENTRY_INDEX
2072: &ENTRY_LABEL&ENTRY_INDEX&:
2073: mov [edi],al ;fill the leading byte
2074: mov [edi+1],ax ;fill the leading word
2075: add edi,3
2076: mov ecx,esi ;# of whole dwords
2077: rep stosd ;fill whole bytes as dwords
2078: mov [edi],ax ;fill the trailing word
2079: mov [edi+2],al ;fill the trailing byte
2080: add edi,3
2081: add edi,edx ;point to the next scan line
2082: endm ;-----------------------------------;
2083:
2084: ; N-wide write-only, 0 leading bytes, 3 trailing bytes.
2085: ; EAX = # of dwords to fill
2086: ; EBX = count of scans to fill
2087: ; EDX = offset from end of one scan's fill to start of next
2088: ; ESI = # of dwords to fill
2089: ; EDI = target address to fill
2090:
2091: align 4
2092: draw_wide_w_33_loop proc near
2093: UNROLL_LOOP DRAW_WIDE_W_33,W_33,LOOP_UNROLL_COUNT
2094: dec ebx
2095: jnz draw_wide_w_33_loop
2096:
2097: ret
2098:
2099: draw_wide_w_33_loop endp
2100:
2101: end
2102:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.