|
|
1.1 root 1: ;-----------------------------------------------------------------------;
2:
3: ; Set LOOP_UNROLL_SHIFT to the log2 of the number of times you want loops in
4: ; this module unrolled. For example, LOOP_UNROLL_SHIFT of 3 yields 2**3 = 8
5: ; times unrolling. This is the only thing you need to change to control
6: ; unrolling.
7:
8: LOOP_UNROLL_SHIFT equ 2
9:
10: ;-----------------------------------------------------------------------;
11:
12: ; This delay is necessitated by a bug in the ATI Ultra when running in
13: ; VGA mode.
14:
15: SLOW_OUT macro
16: push ecx
17: pop ecx
18: out dx,ax
19: endm
20:
21: .386
22:
23: ifndef DOS_PLATFORM
24: .model small,c
25: else
26: ifdef STD_CALL
27: .model small,c
28: else
29: .model small,pascal
30: endif; STD_CALL
31: endif; DOS_PLATFORM
32:
33: assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
34: assume fs:nothing,gs:nothing
35:
36: .xlist
37: include stdcall.inc ;calling convention cmacros
38: include i386\egavga.inc
39: include i386\strucs.inc
40: include i386\unroll.inc
41: include i386\ropdefs.inc
42: include i386\display.inc ; Display specific structures
43:
44: .list
45:
46: ;-----------------------------------------------------------------------;
47:
48: .data
49:
50: ;
51: ; We share some tables with vgablts.asm
52: ;
53:
54: extrn jALUFuncTable :byte
55: extrn jLeftMask :byte
56: extrn jRightMask :byte
57: extrn jForceOnTable :byte
58: extrn jNotTable :byte
59: extrn jInvertDest :byte
60: extrn jForceOffTable :byte
61: extrn vTrgBlt@20 :dword
62:
63: ;-----------------------------------------------------------------------;
64: ; Table of routines to be called to draw edges, according to which edges are
65: ; partial and which edges are whole bytes.
66: align 4
67: public pfnEdgeDrawing
68: pfnEdgeDrawing label dword
69: dd edge_byte_setup
70: dd edge_byte_setup
71: dd check_next_bank
72: dd edge_byte_setup
73:
74: ;-----------------------------------------------------------------------;
75: ; Table of pointers to tables used to find entries points in unrolled wide
76: ; whole byte code.
77:
78: align 4
79: public pfnWideWholeRep
80: pfnWideWholeRep label dword
81: dd pfnDrawWide00Entry
82: dd pfnDrawWide01Entry
83: dd pfnDrawWide10Entry
84: dd pfnDrawWide11Entry
85:
86: ;-----------------------------------------------------------------------;
87: ; Table of pointers to tables used to find entry points in narrow, special-
88: ; cased unrolled replace whole byte code.
89:
90: ; Note: The breakpoint where one should switch from special-casing to
91: ; REP STOS is purely a guess on my part. 8 seemed reasonable.
92:
93: ; Start address MOD 2 is 0.
94: align 4
95: public pfnWholeBytesMod0Entries
96: pfnWholeBytesMod0Entries label dword
97: dd 0 ;we never get a 0-wide case
98: dd pfnDraw1WideEvenEntry
99: dd pfnDraw2WideEvenEntry
100: dd pfnDraw3WideEvenEntry
101: dd pfnDraw4WideEvenEntry
102: dd pfnDraw5WideEvenEntry
103: dd pfnDraw6WideEvenEntry
104: dd pfnDraw7WideEvenEntry
105: dd pfnDraw8WideEvenEntry
106: MAX_REPLACE_SPECIAL equ ($-pfnWholeBytesMod0Entries)/4
107:
108: ; Start address MOD 2 is 1.
109: align 4
110: public pfnWholeBytesMod1Entries
111: pfnWholeBytesMod1Entries label dword
112: dd 0 ;we never get a 0-wide case
113: dd pfnDraw1WideOddEntry
114: dd pfnDraw2WideOddEntry
115: dd pfnDraw3WideOddEntry
116: dd pfnDraw4WideOddEntry
117: dd pfnDraw5WideOddEntry
118: dd pfnDraw6WideOddEntry
119: dd pfnDraw7WideOddEntry
120: dd pfnDraw8WideOddEntry
121:
122:
123: ;-----------------------------------------------------------------------;
124: ; Table of pointers to tables used to find entries points in narrow, special-
125: ; cased unrolled non-replace whole byte code.
126:
127: ; Note: The breakpoint where one should switch from special-casing to
128: ; REP MOVSB is purely a guess on my part. 5 seemed reasonable.
129:
130: align 4
131: pfnWholeBytesNonReplace label dword
132: dd 0 ;we never get a 0-wide case
133: dd pfnDraw1RWEntry
134: dd pfnDraw2RWEntry
135: dd pfnDraw3RWEntry
136: dd pfnDraw4RWEntry
137: MAX_NON_REPLACE_SPECIAL equ ($-pfnWholeBytesNonReplace)/4
138:
139: ; Master MOD 2 alignment look-up table for entry tables for two possible
140: ; alignments for narrow, special-cased unrolled replace whole byte code.
141: align 4
142: public pfnWholeBytesSpecial
143: pfnWholeBytesSpecial label dword
144: dd pfnWholeBytesMod0Entries
145: dd pfnWholeBytesMod1Entries
146:
147: .code
148:
149: ;=============================================================================
150:
151: _TEXT$01 SEGMENT DWORD USE32 PUBLIC 'CODE'
152: ASSUME CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
153:
154: cProc vMonoPatBlt,24,< \
155: uses esi edi ebx, \
156: pdsurf: ptr DEVSURF, \
157: culRcl: dword, \
158: prcl: ptr RECTL, \
159: ulMix: dword, \
160: pBrush: ptr oem_brush_def, \
161: pBrushOrg: ptr POINTL >
162:
163: local ulRowOffset :dword ;Offset from start of scan line
164: ; first byte to fill
165: local ulWholeBytes :dword ;# of whole bytes to fill
166: local ulWholeWords :dword ;# of whole words to fill excluding
167: ;leading and/or trailing bytes
168: local pfnWholeFn :dword ;pointer to routine used to draw
169: ; whole bytes
170: local ulScanWidth :dword ;offset from start of one scan to start
171: ; of next
172: local ulNextScan :dword ;offset from end of one scan line's
173: ; fill to start of next
174: local ulCurrentTopScan :dword ;top scan line to fill in current bank
175: local ulMasks :dword ;low byte = right mask, high byte =
176: ; left mask
177: local ulBottomScan :dword ;bottom scan line of fill rectangle
178:
179: local jALUFunc :dword ;VGA ALU logical operation (SET, AND,
180: ; OR, or XOR)
181: local pfnStartDrawing :dword ;pointer to function to call to start
182: ; drawing
183: local pfnContinueDrawing :dword ;pointer to function to call to
184: ; continue drawing after doing whole
185: ; bytes
186: local ulLeftEdgeAdjust :dword ;used to bump the whole bytes start
187: ; address past the left edge when the
188: ; left edge is partial
189: local pfnWholeBytes :dword ;pointer to table of entry points
190: ; into unrolled loops for whole byte
191: ; filling
192: local ulSpecialBytes ;If we are doing a special case wide
193: ; fill, this will be the width of the
194: ; fill. We need this so we can properly
195: ; increment to the next line.
196: local ulVbNextScan :dword ;Offset from the end of the current
197: ; wide fill drawing operation to the
198: ; top of the next venetian blind line
199: local fdInvertDestFirst :dword;1 if the rop requires a pass to invert
200: ; the destination before the normal
201: ; pass
202:
203: local ulPatternOrgY: dword ;Local copy of the pattern offset Y
204:
205: local ulVbBlindCount :dword ;Temp Height of pattern.
206:
207: local ulVbTopScan :dword ;slats in our blinds
208:
209: local ulVbStartScan :dword ;Current to slat
210:
211: local pUlVbPattern:dword ;inner loop pattern pointer
212:
213: local pUlPattern:dword ;current pattern with proper Y offset
214:
215: local ulVbMask ;Inversion mask for partial edges
216:
217: local ulVbYRound ;
218:
219: local ulVbYShift ;
220:
221: local RotatedPat[32]:byte ;Aligned pattern buffer
222:
223: local ulFgClr:dword ;Local copy of the foreground color
224:
225: local ulBkClr:dword ;Local copy of the background color
226:
227: local pfnWesTrick:dword ;Pointer to the desired inner loop
228: ; wes trick code. While we are doing
229: ; a ROP to full bytes, this will point
230: ; to do_wide_wes_trick otherwise it
231: ; will point to do_edge_wes_trick for
232: ; the edge cases
233: cld
234:
235: ;-----------------------------------------------------------------------;
236: ; Make sure there's something to draw; clip enumerations can be empty.
237: ;-----------------------------------------------------------------------;
238:
239: cmp culRcl,0 ;any rects to fill?
240: jz vMonoPatBlts_done ;no, we're done
241:
242: mov esi,pBrush ;point to the brush
243:
244: xor eax,eax
245: mov al,[esi + oem_brush_fg]
246: mov ulFgClr,eax ;Make local copy of the fg color
247:
248: mov al,[esi + oem_brush_bg]
249: mov ulBkClr,eax ;Make local copy of the bk color
250:
251: ;-----------------------------------------------------------------------;
252: ; Set up for the desired raster op.
253: ;-----------------------------------------------------------------------;
254: sub ebx,ebx ;ignore any background mix; we're only
255: mov bl,byte ptr ulMix ; concerned with the foreground in this
256: ; module
257: cmp ebx,R2_NOP ;is this NOP?
258: jz vMonoPatBlts_done ;yes, we're done
259: sub eax,eax ;we want a dword
260: mov al,jInvertDest[ebx] ;remember whether we need to invert the
261: mov fdInvertDestFirst,eax ; destination before finishing the rop
262: mov eax,ulFgClr
263: and al,jForceOffTable[ebx] ;force color to 0 if necessary
264: ; (R2_BLACK)
265: or al,jForceOnTable[ebx] ;force color to 0ffh if necessary
266: ; (R2_WHITE, R2_NOT)
267: xor al,jNotTable[ebx] ;invert color if necessary (any Pn mix)
268: ;at this point, CH has the color we
269: ; want to draw with; set up the VGA
270: ; hardware to draw with that color
271: mov ulFgClr,eax
272:
273: mov eax,ulBkClr
274: and al,jForceOffTable[ebx] ;force color to 0 if necessary
275: ; (R2_BLACK)
276: or al,jForceOnTable[ebx] ;force color to 0ffh if necessary
277: ; (R2_WHITE, R2_NOT)
278: xor al,jNotTable[ebx] ;invert color if necessary (any Pn mix)
279: ;at this point, CH has the color we
280: ; want to draw with; set up the VGA
281: ; hardware to draw with that color
282: mov ulBkClr,eax
283:
284: mov ah,jALUFuncTable[ebx] ;get the ALU logical function
285: and ah,ah ;is the logical function DR_SET?
286: .errnz DR_SET
287: jz short skip_ALU_set ;yes, don't have to set because that's
288: ; the VGA's default state
289: mov edx,VGA_BASE + GRAF_ADDR
290: mov al,GRAF_DATA_ROT
291: SLOW_OUT ;set the ALU logical function
292: skip_ALU_set:
293: mov byte ptr jALUFunc,ah ;remember the ALU logical function
294:
295: ;-----------------------------------------------------------------------;
296: ; Set up variables that are constant for the entire time we're in this
297: ; module.
298: ;-----------------------------------------------------------------------;
299: mov edx,pBrushOrg ;point to the brush origin
300:
301: mov ecx,[edx].ptl_x
302: and ecx,15 ;eax mod 16
303:
304: mov eax,[edx].ptl_y
305: mov ulPatternOrgY,eax
306:
307: ;We are now going to make a copy of our rotated copy of our pattern.
308: ;The reason that we do this is because we may be called with several
309: ;rectangles and we don't really want to rotate the pattern data for
310: ;each rectangle. We copy this rectangle to be double high so that
311: ;we can incorperate our Y offest later without having to worry
312: ;about running off the end of the pattern.
313:
314: lea edi,RotatedPat ;Pattern Dest
315: mov esi,[esi + oem_brush_pmono] ;Pattern Src
316: or cl,cl
317: jnz rotate_and_expand
318:
319: INDEX=0
320: rept 4 ;patterns are 16x8
321: mov eax,[esi+INDEX]
322: mov [edi+INDEX],eax
323: mov [edi+16+INDEX],eax
324: INDEX=INDEX+4
325: endm ;-----------------
326: jmp fill_rect_loop
327:
328: rotate_and_expand:
329: INDEX=0
330: rept 8 ;patterns are 16x8
331: mov ah,[esi+INDEX] ;load bytes for shift
332: mov al,[esi+1+INDEX] ;convert from little to big endian
333: ror ax,cl ;shift into position
334: mov [edi+INDEX],ah ;save result
335: mov [edi+1+INDEX],al
336: mov [edi+16+INDEX],ah ;save result to second copy
337: mov [edi+17+INDEX],al
338: INDEX=INDEX+2
339: endm ;-----------------
340:
341:
342: fill_rect_loop:
343: ;-----------------------------------------------------------------------;
344: ; Set up masks and widths.
345: ;-----------------------------------------------------------------------;
346: mov edi,prcl ;point to rectangle to fill
347:
348: sub eax,eax
349: mov ulLeftEdgeAdjust,eax ;initalize variable
350: mov ulSpecialBytes,eax ;initalize variable
351:
352: mov eax,[edi].yBottom
353: mov ulBottomScan,eax ;remember the bottom scan line of fill
354:
355: mov ebx,[edi].xRight ;right edge of fill (non-inclusive)
356: mov ecx,ebx
357: and ecx,0111b ;intrabyte address of right edge
358: mov ah,jRightMask[ecx] ;right edge mask
359:
360: mov esi,[edi].xLeft ;left edge of fill (inclusive)
361: mov ecx,esi
362: shr ecx,3 ;/8 for start offset from left edge
363: ; of scan line
364: mov ulRowOffset,ecx ;remember offset from start of scan
365: ; line
366: sub ebx,esi ;width in pixels of fill
367:
368: and esi,0111b ;intrabyte address of left edge
369: mov al,jLeftMask[esi] ;left edge mask
370:
371: dec ebx ;make inclusive on right
372: add ebx,esi ;inclusive width, starting counting at
373: ; the beginning of the left edge byte
374: shr ebx,3 ;width of fill in bytes touched - 1
375: jnz short more_than_1_byte ;more than 1 byte is involved
376:
377: ; Only one byte will be affected. Combine first/last masks.
378:
379: and al,ah ;we'll use first byte mask only
380: xor ah,ah ;want last byte mask to be 0
381: inc ebx ;so there's one count to subtract below
382: ; if this isn't a whole edge byte
383: more_than_1_byte:
384:
385: ; If all pixels in the left edge are altered, combine the first byte into the
386: ; whole byte count and clear the first byte mask, because we can handle solid
387: ; edge bytes faster as part of the whole bytes. Ditto for the right edge.
388:
389: sub ecx,ecx ;edge whole-status accumulator
390: cmp al,-1 ;is left edge a whole byte or partial?
391: adc ecx,ecx ;ECX=1 if left edge partial, 0 if whole
392: sub ebx,ecx ;if left edge partial, deduct it from
393: ; the whole bytes count
394: mov ulLeftEdgeAdjust,ecx ;for skipping over the left edge if
395: ; it's partial when pointing to the
396: ; whole bytes
397: and ah,ah ;is right edge mask 0, meaning this
398: ; fill is only 1 byte wide?
399: jz short save_masks ;yes, no need to do anything
400: cmp ah,-1 ;is right edge a whole byte or partial?
401: jnz short save_masks ;partial
402: mov ah,0 ;
403: add ecx,2 ;bit 1 of ECX=0 if right edge partial,
404: ; 1 if whole;
405: ;bit 1=0 if left edge partial, 1 whole
406: inc ebx ;if right edge whole, include it in the
407: ; whole bytes count
408: save_masks:
409: mov ulMasks,eax ;save left and right clip masks
410: mov ulWholeBytes,ebx ;save # of whole bytes
411:
412: mov ecx,pfnEdgeDrawing[ecx*4] ;set address of routine to draw
413: mov pfnContinueDrawing,ecx ; all partial (non-whole) edges
414:
415: and ebx,ebx ;any whole bytes?
416: jz start_vec_set ;no
417: ;yes, so draw the whole bytes before
418: ; the edge bytes
419:
420: ; The whole bytes loop depends on the type of operation being done. If the
421: ; operation is one which uses DR_SET, then we can use a STOS-type operation,
422: ; else we have to use a MOVSB-type operation (to load the latches with the
423: ; existing contents of display memory to allow the ALUs to work).
424: cmp byte ptr jALUFunc,DR_SET ;is it a replace-type rop?
425: jz short is_replace_type ;yes
426: ;no, set up for non-replace whole bytes
427: mov ecx,offset non_replace_wide
428:
429: cmp ebx,MAX_NON_REPLACE_SPECIAL ;too wide to special case?
430: jb short non_replace_spec ;nope
431:
432: lea eax,pfnDrawRWWideEntry ;assume too wide to special-case
433: mov pfnWholeBytes,eax ; table for width
434:
435: jmp short start_vec_set
436:
437: align 4
438: non_replace_spec:
439:
440: mov eax,pfnWholeBytesNonReplace[ebx*4] ;no, point to entry
441: mov pfnWholeBytes,eax ; table for width
442: mov ulSpecialBytes,ebx
443: ;narrow enough to special case. Look up
444: ; the entry table for the special case
445: ; base on the start alignment
446:
447: jmp short start_vec_set
448:
449: align 4
450: is_replace_type: ;set up for replace-type rop
451: cmp ebx,MAX_REPLACE_SPECIAL ;too wide to special case?
452: jnb short is_wide_replace ;yes
453:
454: mov ulSpecialBytes,ebx
455: ;narrow enough to special case. Look up
456: ; the entry table for the special case
457: ; base on the start alignment
458: mov ecx,ulRowOffset
459: add ecx,ulLeftEdgeAdjust ;left edge whole bytes start offset
460: and ecx,01b ;left edge whole bytes start alignment
461: ; MOD 2
462: mov ecx,pfnWholeBytesSpecial[ecx*4] ;look up table of entry
463: ; tables for alignment
464: mov ecx,[ecx+ebx*4] ;look up entry table for width
465: mov pfnWholeBytes,ecx ; table for width
466: mov ecx,offset whole_bytes_rep_wide
467:
468: jmp short start_vec_set
469:
470: align 4
471: is_wide_replace: ;set up for wide replace-type op
472: ;Note: assumes there is at least one
473: ; full word involved!
474: mov ecx,ulRowOffset
475: add ecx,ulLeftEdgeAdjust ;left edge whole bytes start offset
476: neg ecx
477: and ecx,01b
478: mov edx,ebx
479: sub edx,ecx ;ignore odd leading bytes
480: mov eax,edx
481: shr edx,1 ;# of whole words across (not counting
482: ; odd leading & trailing bytes)
483: mov ulWholeWords,edx
484: and eax,01b ;# of odd (fractional) trailing bytes
485: add ecx,ecx
486: or ecx,eax ;build a look-up index from the number
487: ; of leading and trailing bytes
488: mov ecx,pfnWideWholeRep[ecx*4] ;proper drawing handler for front/
489: mov pfnWholeBytes,ecx ; back alignment
490: mov ecx,offset whole_bytes_rep_wide
491: ;set up to call routine to perform wide
492: ; whole bytes fill
493:
494: start_vec_set:
495: mov pfnStartDrawing,ecx ; all partial (non-whole) edges
496:
497: mov ecx,pdsurf
498: mov eax,[ecx].dsurf_lNextScan
499: mov ulScanWidth,eax ;local copy of scan line width
500: sub eax,ebx ;EAX = delta to next scan
501: mov ulNextScan,eax
502:
503: mov esi,pBrush
504: mov eax,[esi+oem_brush_height]
505: dec eax
506: mov ulVbYRound,eax
507: mov al,[esi + oem_brush_yshft] ; blind to the next.
508: mov ulVbYShift,eax
509:
510: mov cl,al
511: mov eax,UlScanWidth
512: shl eax,cl ;ulNextScan * 8
513: mov ulVbNextScan,eax ;
514:
515: cmp fdInvertDestFirst,1 ;is this an invert-dest-plus-something-
516: ; else rop that requires two passes?
517: jnz short do_single_pass
518:
519: lea eax,vTrgBlt@20
520: ptrCall <eax>,<pdsurf, culRcl, prcl, R2_NOT, -1>
521:
522: mov ah,byte ptr jALUFunc ;reset the ALU logical function
523: mov edx,VGA_BASE + GRAF_ADDR
524: mov al,GRAF_DATA_ROT
525: SLOW_OUT ;set the ALU logical function
526:
527: do_single_pass:
528: call draw_banks
529:
530: ;-----------------------------------------------------------------------;
531: ; See if there are any more rectangles to fill.
532: ;-----------------------------------------------------------------------;
533:
534: add prcl,(size RECTL) ;point to the next rectangle, if there is one
535: dec culRcl ;count down the rectangles to fill
536: jnz fill_rect_loop
537:
538:
539: ;-----------------------------------------------------------------------;
540: ; We have filled all rectangles. Restore the VGA to its default state.
541: ;-----------------------------------------------------------------------;
542:
543: mov edx,VGA_BASE + GRAF_ADDR
544: mov eax,0000h + GRAF_ENAB_SR ;disable set/reset
545: out dx,ax
546: mov eax,GRAF_MODE + ((M_PROC_WRITE + M_DATA_READ) SHL 8)
547: out dx,ax ;restore read mode 0 and write mode 0
548: mov eax,(DR_SET shl 8) + GRAF_DATA_ROT ;set the logical function to
549: out dx,ax ; SET
550: vMonoPatBlts_done:
551: cRet vMonoPatBlt
552:
553: ;-----------------------------------------------------------------------;
554: ; Fills all banks in the current fill rectangle. Called once per fill
555: ; rectangle, except for destination-inversion-plus-something-else rops.
556: ;-----------------------------------------------------------------------;
557:
558: align 4
559: draw_banks:
560:
561: ;-----------------------------------------------------------------------;
562: ; Map in the bank containing the top scan to fill, if it's not mapped in
563: ; already.
564: ;-----------------------------------------------------------------------;
565:
566: mov edi,prcl ;point to rectangle to fill
567: mov ecx,pdsurf ;point to surface
568: mov eax,[edi].yTop ;top scan line of fill
569: mov ulCurrentTopScan,eax ;this will be the fill top in 1st bank
570:
571: cmp eax,[ecx].dsurf_rcl1WindowClip.yTop ;is fill top less than
572: ; current bank?
573: jl short map_init_bank ;yes, map in proper bank
574: cmp eax,[ecx].dsurf_rcl1WindowClip.yBottom ;fill top greater than
575: ; current bank?
576: jl short init_bank_mapped ;no, proper bank already mapped
577: map_init_bank:
578:
579: ; Map in the bank containing the top scan line of the fill.
580:
581: ptrCall <dword ptr [ecx].dsurf_pfnBankControl>,<ecx,eax,JustifyTop>
582:
583: init_bank_mapped:
584:
585: ;-----------------------------------------------------------------------;
586: ; Main loop for processing fill in each bank.
587: ;-----------------------------------------------------------------------;
588:
589: ; Compute the starting address and scan line count for the initial bank.
590:
591: mov eax,pdsurf ;EAX->target surface
592: mov ebx,ulBottomScan ;bottom of destination rectangle
593: cmp ebx,[eax].dsurf_rcl1WindowClip.yBottom
594: ;which comes first, the bottom of the
595: ; dest rect or the bottom of the
596: ; current bank?
597: jl short BottomScanSet ;fill bottom comes first, so draw to
598: ; that; this is the last bank in fill
599: mov ebx,[eax].dsurf_rcl1WindowClip.yBottom
600: ;bank bottom comes first; draw to
601: ; bottom of bank
602: BottomScanSet:
603: mov edi,ulCurrentTopScan ;top scan line to fill in current bank
604: sub ebx,edi ;# of scans to fill in bank
605: imul edi,ulScanWidth ;offset of starting scan line
606:
607: ; Note that the start of the bitmap will change each time through the
608: ; bank loop, because the start of the bitmap is varied to map the
609: ; desired scan line to the banking window.
610:
611: add edi,[eax].dsurf_pvBitmapStart ;start of scan in bitmap
612: add edi,ulRowOffset ;EDI = start offset of fill in bitmap
613:
614: ; We have computed the starting address and scan count. Time to start drawing
615: ; in the initial bank.
616:
617: mov esi,pBrush ;edx = min(PatternHeight,BltHeight)
618: mov ecx,[esi + oem_brush_height]
619: sub ecx,ebx
620: sbb edx,edx
621: and edx,ecx
622: add edx,ebx
623: mov ulVbBlindCount,edx
624:
625: ; Brush alignment. We need to look at pptlBrush
626:
627: mov eax,ulCurrentTopScan ;top scan line to fill in current bank
628: sub eax,ulPatternOrgY ;
629:
630: jns short pos_y_offset ;
631: neg eax ;
632: and eax,7 ;-eax mod 8
633: neg eax ;
634: add eax,8 ;
635: jmp short save_pat_pointer
636: pos_y_offset:
637: and eax,7 ;eax mod 8
638: save_pat_pointer:
639: add eax,eax ;Y Offset * PatternWidth (2 bytes)
640:
641: lea edx,RotatedPat ;Pattern Dest
642: add eax,edx
643: mov pulPattern,eax ;Drawing code uses this as the
644: ;source for the pattern
645:
646: jmp pfnStartDrawing
647:
648:
649: ;-----------------------------------------------------------------------;
650: ; Whole byte fills.
651: ;-----------------------------------------------------------------------;
652:
653: ;-----------------------------------------------------------------------;
654: ; Handles non-replace whole byte fills wider than the maximum special
655: ; case width.
656: ;
657: ; The destination is not involved, so a STOS (or equivalent) can be used
658: ; (no read needed before write).
659: ;-----------------------------------------------------------------------;
660:
661: align 4
662: public whole_bytes_rep_wide
663: whole_bytes_rep_wide:
664: push ebx ;save scan count
665: push edi ;save starting address
666:
667: add edi,ulLeftEdgeAdjust ;point to first whole byte to fill
668:
669: mov edx,VGA_BASE + GRAF_ADDR
670: mov eax,GRAF_MODE + ((M_COLOR_WRITE + M_COLOR_READ) SHL 8)
671: out dx,ax ;write mode 2
672: mov eax,ulBkClr ;Set the write mode to write mode
673: mov [edi],al ; three after we load the latches
674: mov al,[edi] ; with our background color
675:
676: mov al,GRAF_SET_RESET ;Set the foreground color
677: out dx,al ; into set/reset
678: inc edx
679: in al,dx
680: and eax,0f0h
681: or eax,ulFgClr
682: out dx,al
683: dec edx
684:
685: mov eax,GRAF_MODE + ((M_AND_WRITE + M_COLOR_READ) SHL 8)
686: out dx,ax ;write mode 3 so we can do the masking
687: ; without OUTs, read mode 1 so we can
688: ; read 0xFF from memory always, for
689: ; ANDing (because Color Don't Care is
690: ; all zeros)
691:
692: mov esi,pulPattern ; pointer to pattern bits
693: mov ax,[esi] ; into place
694: add esi,2
695: mov pulVbPattern,esi
696:
697:
698: mov ulVbTopScan,ebx ;our pattern is 8 high so we don't
699: add ebx,ulVbYRound ;Calc the number of lines to do
700: mov ecx,ulVbyShift
701: shr ebx,cl ;only need to go through the code
702: ; count/8 times. We will handle any
703: ; extra lines at the bottom
704: ; (ulVbTopScan mod 8) in our loops.
705: mov edx,pfnWholeBytes
706: push ulVbBlindCount
707:
708: public wide_bytes_loop
709: wide_bytes_loop:
710: SET_UP_UNROLL_VARS ebx, ecx, ebx, [edx], LOOP_UNROLL_SHIFT
711:
712: mov esi,ulWholeWords ;number of aligned word writes
713: mov edx,ulVbNextScan ;offset from end of one scan line to
714: ; start of next the same scan line
715: ; in the next pattern.
716: sub edx,ulWholeBytes
717: add edx,ulSpecialBytes
718:
719:
720: ; eax = rotated pattern
721: ; ebx = unrolled count
722: ; ecx = routine address
723: ; edx = ulVbNextScan
724: ; esi = ulFvWholeWords
725: ; edi = pDest
726: ;
727: push edi ;save out dest pointer
728: call ecx ;draw the wide whole bytes
729: pop edi ;restore out dest pointer
730:
731: add edi,ulScanWidth ;advance to next scan line
732:
733: dec ulVbBlindCount
734: jz short wide_bytes_end
735:
736: mov eax,ulVbTopScan ;restore scan count
737: dec eax ;Subtract off completed top line
738: mov ulVbTopScan,eax
739: add eax,ulVbYRound ;Calc the number of lines to do
740: mov ecx,ulVbyShift
741: shr eax,cl ;for this venetian blind pass
742: mov ebx,eax ;including any partial patterns
743: ; at the bottom
744:
745: mov esi,pulVbPattern ;Pattern data
746: mov ax,[esi] ;get pattern word
747: add esi,2
748: mov pulVbPattern,esi ;save pattern pointer for later
749:
750: mov edx,pfnWholeBytes
751:
752: jmp short wide_bytes_loop
753:
754: wide_bytes_end:
755: pop ulVbBlindCount
756: pop edi ;restore screen pointer
757: pop ebx ;restore fill scan count
758:
759: mov edx,VGA_BASE + GRAF_ADDR ;restore proper read/write modes
760: mov eax,GRAF_MODE + ((M_PROC_WRITE + M_DATA_READ) SHL 8)
761: out dx,ax
762:
763: jmp pfnContinueDrawing ;either keep drawing or we're done
764:
765:
766: ;-----------------------------------------------------------------------;
767: ; Handle case where both edges are partial (non-whole) bytes.
768: ;-----------------------------------------------------------------------;
769:
770: align 4
771: public non_replace_wide
772: non_replace_wide:
773: push ebx ;Save line count
774: push edi ;Save Dest Addr
775:
776: add edi,ulLeftEdgeAdjust ;point to first whole byte to fill
777:
778: lea eax,do_wide_wes_trick
779: mov pfnWesTrick,eax
780:
781: mov ecx,ulFgClr
782: xor ecx,ulBkClr ;mask = ulBkClr ^ ulFgClr
783:
784: mov ah,cl ;sre = !mask
785: not ah ;Set/Reset Enable
786: mov edx,EGA_BASE+GRAF_ADDR
787: mov al,GRAF_ENAB_SR
788: out dx,ax ;Set Set/Reset Enable bits
789:
790: mov ah,byte ptr ulBkClr ;Set/Reset = background color
791: mov al,GRAF_SET_RESET
792: out dx,ax
793:
794: mov eax,GRAF_MODE + ((M_PROC_WRITE + M_COLOR_READ) SHL 8)
795: out dx,ax ; Set Read Mode 0
796:
797: ;save the width count and pfn here
798:
799: call wes_trick
800:
801: mov edx,EGA_BASE+SEQ_DATA
802: mov eax,0fh
803: out dx,al
804:
805: mov edx,EGA_BASE+GRAF_ADDR
806: mov eax,GRAF_MODE + ((M_PROC_WRITE + M_DATA_READ) SHL 8)
807: out dx,ax
808:
809: mov eax,GRAF_ENAB_SR
810: out dx,ax ;Reset Set/Reset Enable bits
811:
812: pop edi
813: pop ebx
814:
815: jmp pfnContinueDrawing ;either keep drawing or we're done
816:
817: ;-----------------------------------------------------------------------;
818: ; Process any left/right columns that that have to be done.
819: ;
820: ; Currently:
821: ; EBX = height to fill, in scans
822: ; EDI --> first byte of left edge
823: ;-----------------------------------------------------------------------;
824:
825:
826: ;-----------------------------------------------------------------------;
827: ; Handle case where both edges are partial (non-whole) bytes.
828: ;-----------------------------------------------------------------------;
829:
830: align 4
831: public edge_byte_setup
832: edge_byte_setup:
833: lea eax,do_edge_wes_trick
834: mov pfnWesTrick,eax
835:
836: mov ecx,ulFgClr
837: xor ecx,ulBkClr ;mask = ulBkClr ^ ulFgClr
838:
839: mov ah,cl ;sre = !mask
840: not ah ;Set/Reset Enable
841: mov edx,EGA_BASE+GRAF_ADDR
842: mov al,GRAF_ENAB_SR
843: out dx,ax ;Set Set/Reset Enable bits
844:
845: mov ah,byte ptr ulBkClr ;Set/Reset = foreground color
846: mov al,GRAF_SET_RESET
847: out dx,ax
848:
849: mov eax,ulLeftEdgeAdjust
850: or eax,eax
851: jz short do_right_edge
852:
853: mov eax,ulMasks ;Get Left/Right edge Masks
854: mov ah,al
855: mov al,GRAF_BIT_MASK
856: mov edx,EGA_BASE+GRAF_ADDR
857: out dx,ax
858:
859: inc pulPattern ;Adjust Pattern rotation
860:
861: push ebx ;Save line count
862: push edi ;Save Dest Addr
863: call wes_trick
864: pop edi
865: pop ebx
866:
867: mov eax,ulMasks ;restore Left/Right edge Masks
868: dec pulPattern ;Adjust Pattern rotation
869:
870: do_right_edge:
871: mov eax,ulMasks ;Get Left/Right edge Masks
872: and ah,0ffh
873: jz edge_done
874:
875: mov al,GRAF_BIT_MASK
876: mov edx,EGA_BASE+GRAF_ADDR
877: out dx,ax
878:
879: add edi,ulLeftEdgeAdjust ;point to first whole byte to fill
880: add edi,ulWholeBytes ;point to right edge byte to fill
881: call wes_trick
882:
883: edge_done:
884: mov edx,EGA_BASE+SEQ_DATA
885: mov eax,0fh
886: out dx,al
887:
888: mov edx,EGA_BASE+GRAF_ADDR
889: mov eax,GRAF_BIT_MASK+0ff00h
890: out dx,ax
891:
892: mov eax,GRAF_ENAB_SR
893: out dx,ax ;Reset Set/Reset Enable bits
894:
895:
896: ;-----------------------------------------------------------------------;
897: ; See if there are any more banks to process.
898: ;-----------------------------------------------------------------------;
899:
900: public check_next_bank
901: check_next_bank:
902:
903: mov edi,pdsurf
904: mov eax,[edi].dsurf_rcl1WindowClip.yBottom ;is the fill bottom in
905: cmp ulBottomScan,eax ; the current bank?
906: jle short banks_done ;yes, so we're done
907: ;no, map in the next bank and fill it
908: mov ulCurrentTopScan,eax ;remember where the top of the bank
909: ; we're about to map in is (same as
910: ; bottom of bank we just did)
911:
912: ptrCall <dword ptr [edi].dsurf_pfnBankControl>,<edi,eax,JustifyTop>
913: ;map in the bank
914:
915: ; Compute the starting address and scan line count in this bank.
916:
917: mov eax,pdsurf ;EAX->target surface
918: mov ebx,ulBottomScan ;bottom of destination rectangle
919: cmp ebx,[eax].dsurf_rcl1WindowClip.yBottom
920: ;which comes first, the bottom of the
921: ; dest rect or the bottom of the
922: ; current bank?
923: jl short BottomScanSet2 ;fill bottom comes first, so draw to
924: ; that; this is the last bank in fill
925: mov ebx,[eax].dsurf_rcl1WindowClip.yBottom
926: ;bank bottom comes first; draw to
927: ; bottom of bank
928: BottomScanSet2:
929: mov edi,ulCurrentTopScan ;top scan line to fill in current bank
930: sub ebx,edi ;# of scans to fill in bank
931: imul edi,ulScanWidth ;offset of starting scan line
932:
933: ; Note that the start of the bitmap will change each time through the
934: ; bank loop, because the start of the bitmap is varied to map the
935: ; desired scan line to the banking window.
936:
937: add edi,[eax].dsurf_pvBitmapStart ;start of scan in bitmap
938: add edi,ulRowOffset ;EDI = start offset of fill in bitmap
939:
940: ; We have computed the starting address and scan count. Time to start drawing
941: ; in the initial bank.
942:
943: mov esi,pBrush ;edx = min(PatternHeight,BltHeight)
944: mov ecx,[esi + oem_brush_height]
945: sub ecx,ebx
946: sbb edx,edx
947: and edx,ecx
948: add edx,ebx
949: mov ulVbBlindCount,edx
950:
951: ; Brush alignment. We need to look at pptlBrush
952:
953: mov eax,ulCurrentTopScan ;top scan line to fill in current bank
954: sub eax,ulPatternOrgY ;
955:
956: jns short pos_y_offset1 ;
957: neg eax ;
958: and eax,7 ;-eax mod 8
959: neg eax ;
960: add eax,8 ;
961: jmp short save_pat_pointer1
962: pos_y_offset1:
963: and eax,7 ;eax mod 8
964: save_pat_pointer1:
965: add eax,eax ;Y Offset * PatternWidth (2 bytes)
966:
967: lea edx,RotatedPat ;Pattern Dest
968: add eax,edx
969: mov pulPattern,eax ;Drawing code uses this as the
970: ;source for the pattern
971:
972: ; Draw in the new bank.
973:
974: jmp pfnStartDrawing
975:
976:
977: ;-----------------------------------------------------------------------;
978: ; Done with all banks in this fill.
979:
980: public banks_done
981: banks_done:
982: PLAIN_RET
983:
984: endProc vMonoPatBlt
985:
986: ;----------------------------------------------------------------------------
987: ; Wes Trick Setup code. This code decides if this is a one or a two pass
988: ; operation.
989: ;----------------------------------------------------------------------------
990: align 4
991: public wes_trick
992: wes_trick:
993: mov esi,pfnWesTrick
994:
995: mov ecx,ulFgClr ;
996: mov eax,ecx ;
997: xor ecx,ulBkClr ;mask = ulBkClr ^ ulFgClr
998:
999: mov edx,EGA_BASE+SEQ_DATA ;Index should be pointing to the
1000: ; plane mask (2)
1001: mov ch,cl
1002: not ch ;Set/Reset Enable bits
1003: and cl,al ;ulFgdColor & mask
1004: or cl,al
1005: jz short check_bk_bits ;if zero - one background pass
1006:
1007: mov ulVbMask,0 ;We do not want to invert the
1008: ;foreground pass
1009: or ch,cl
1010: mov al,ch
1011: out dx,al ;Enable Planes for First Pass
1012:
1013: push ecx
1014: push edi ;Save our Dest pointer
1015: push ebx ;Save our count
1016: call esi ;Draw the foreground pass
1017: pop ebx ;restore the line count
1018: pop edi ;restore the dest pointer
1019: pop eax ;Restore bk mask
1020: mov esi,pfnWesTrick
1021:
1022:
1023: check_bk_bits:
1024: not al
1025: and al,MM_ALL
1026: jnz short @f
1027: ret
1028: @@:
1029: mov ulVbMask,-1 ;We do not want to invert the
1030: mov edx,EGA_BASE+SEQ_DATA ;Index should be pointing to the
1031: ; plane mask (2)
1032: out dx,al
1033: jmp esi
1034:
1035: ;--------------------------------------------------------------------------
1036: ; Do the edges here.
1037: ;--------------------------------------------------------------------------
1038:
1039: align 4
1040: public do_edge_wes_trick
1041: do_edge_wes_trick:
1042: ; ebx = line count
1043: ; edi = dest
1044:
1045: mov ulVbTopScan,ebx ;Mod 8 our count for the venetian blind
1046: add ebx,ulVbYRound ;Calc the number of lines to do
1047: mov ecx,ulVbyShift
1048: shr ebx,cl
1049:
1050: mov esi,pulPattern
1051: mov ax,[esi] ;get pattern into place
1052: add esi,2 ;patterns stored as words
1053: xor eax,ulVbMask ;Invert the pattern if we are doing
1054: ; a background pass
1055:
1056: push ulVbBlindCount
1057: ; Set up variables for entering unrolled loop.
1058: wes_trick_loop:
1059: SET_UP_UNROLL_VARS ebx,edx, ebx,pfnDraw1WideEntry, LOOP_UNROLL_SHIFT
1060:
1061: mov ecx,ulVbNextScan ;offset from one scan to next
1062:
1063: push edi ;save dest pointer
1064: call edx ;jump into the unrolled loop to draw
1065: pop edi ;restore dest pointer
1066:
1067: add edi,ulScanWidth ;move to next scan line
1068:
1069: dec ulVbBlindCount
1070: jz short wes_trick_loop_done ;jz if we are finished
1071:
1072: mov eax,ulVbTopScan ;restore scan count
1073: dec eax ;Subtract off completed top line
1074: mov ulVbTopScan,eax ;save for next loop
1075: add eax,ulVbYRound ;Calc the number of lines to do
1076: mov ecx,ulVbyShift ;for this venetian blind pass
1077: shr eax,cl ;including any partial patterns
1078: mov ebx,eax ;at the bottom
1079:
1080:
1081: mov ax,[esi]
1082: add esi,2 ;point to the next pattern line
1083: xor eax,ulVbMask ;Invert the pattern if we are doing
1084: ; a background pass
1085:
1086: jmp short wes_trick_loop
1087:
1088: align 4
1089: wes_trick_loop_done:
1090: pop ulVbBlindCount
1091: ret
1092:
1093: ;--------------------------------------------------------------------------
1094: ; Do the middle bytes here for blts with rops.
1095: ;--------------------------------------------------------------------------
1096:
1097: align 4
1098: public do_wide_wes_trick
1099: do_wide_wes_trick:
1100: ; ebx = line count
1101: ; edi = dest
1102:
1103: mov ulVbTopScan,ebx ;Mod 8 our count for the venetian blind
1104: add ebx,ulVbYRound ;Calc the number of lines to do
1105: mov ecx,ulVbyShift
1106: shr ebx,cl
1107:
1108: mov esi,pulPattern
1109: mov al,[esi] ;get pattern into place
1110: add esi,2 ;patterns stored as words
1111: mov pulVbPattern,esi
1112: xor eax,ulVbMask ;Invert the pattern if we are doing
1113: ; a background pass
1114:
1115: push ulVbBlindCount
1116:
1117: mov edx,pfnWholeBytes
1118:
1119: ; Set up variables for entering unrolled loop.
1120: wide_wes_trick_loop:
1121:
1122: SET_UP_UNROLL_VARS ebx,ecx,ebx,[edx], LOOP_UNROLL_SHIFT
1123:
1124: mov esi,ulWholeBytes
1125: mov edx,ulVbNextScan ;offset from one scan to next
1126: sub edx,esi
1127: add edx,ulSpecialBytes
1128:
1129: push edi ;save dest pointer
1130: call ecx ;jump into the unrolled loop to draw
1131: pop edi ;restore dest pointer
1132:
1133: add edi,ulScanWidth ;move to next scan line
1134:
1135: dec ulVbBlindCount
1136: jz short wide_wes_trick_loop_done ;jz if we are finished
1137:
1138: mov eax,ulVbTopScan ;restore scan count
1139: dec eax ;Subtract off completed top line
1140: mov ulVbTopScan,eax ;save for next loop
1141: add eax,ulVbYRound ;Calc the number of lines to do
1142: mov ecx,ulVbyShift ;for this venetian blind pass
1143: shr eax,cl ;including any partial patterns
1144: mov ebx,eax ;at the bottom
1145:
1146:
1147: mov esi,pulVbPattern
1148: mov al,[esi] ;get pattern word
1149: add esi,2 ;point to the next pattern line
1150: mov pulVbPattern,esi
1151: xor eax,ulVbMask ;Invert the pattern if we are doing
1152: ; a background pass
1153:
1154: mov edx,pfnWholeBytes
1155: jmp short wide_wes_trick_loop
1156:
1157: align 4
1158: wide_wes_trick_loop_done:
1159: pop ulVbBlindCount
1160: ret
1161:
1162: ;-----------------------------------------------------------------------;
1163: ; Unrolled loops.
1164: ;-----------------------------------------------------------------------;
1165:
1166: ;-----------------------------------------------------------------------;
1167: ; Unrolled loop stuff for wide replace-type rops (arbitrary width).
1168: ;-----------------------------------------------------------------------;
1169:
1170: ; Tables of entry points into unrolled wide write-only loops.
1171: UNROLL_LOOP_ENTRY_TABLE pfnDrawWide00Entry,W00,LOOP_UNROLL_COUNT
1172: UNROLL_LOOP_ENTRY_TABLE pfnDrawWide01Entry,W01,LOOP_UNROLL_COUNT
1173: UNROLL_LOOP_ENTRY_TABLE pfnDrawWide10Entry,W10,LOOP_UNROLL_COUNT
1174: UNROLL_LOOP_ENTRY_TABLE pfnDrawWide11Entry,W11,LOOP_UNROLL_COUNT
1175:
1176: ;-----------------------------------------------------------------------;
1177: ; Macro to draw n bytes, 0 leading bytes, 0 trailing bytes, then advance
1178: ; to next scan line.
1179:
1180: DRAW_WIDE_00 macro ENTRY_LABEL,ENTRY_INDEX
1181: &ENTRY_LABEL&ENTRY_INDEX&:
1182: mov ecx,esi ;# of whole words
1183: rep stosw ;fill all whole bytes as dwords
1184: add edi,edx ;point to the next scan line
1185: endm ;-----------------------------------;
1186:
1187: ; N-wide write-only, 0 leading bytes, 0 trailing bytes.
1188: ; EAX = Pattern Byte
1189: ; EBX = count of scans to fill ((total scans/ pattern height) + partial)
1190: ; EDX = offset from end of one scan's fill to start of next similar line
1191: ; ESI = pattern data
1192: ; EDI = target address to fill
1193:
1194: align 4
1195: draw_wide_00_loop proc near
1196: UNROLL_LOOP DRAW_WIDE_00,W00,LOOP_UNROLL_COUNT
1197: dec ebx
1198: jnz draw_wide_00_loop
1199:
1200: ret
1201:
1202: draw_wide_00_loop endp
1203:
1204:
1205: ;-----------------------------------------------------------------------;
1206: ; Macro to draw n bytes, 0 leading bytes, 0 trailing bytes, then advance
1207: ; to next scan line.
1208:
1209: DRAW_WIDE_01 macro ENTRY_LABEL,ENTRY_INDEX
1210: &ENTRY_LABEL&ENTRY_INDEX&:
1211: mov ecx,esi ;# of whole words
1212: rep stosw ;fill all whole bytes as dwords
1213: mov [edi],al ;trailing byte
1214: inc edi
1215: add edi,edx ;point to the next scan line
1216: endm ;-----------------------------------;
1217:
1218: ; N-wide write-only, 0 leading bytes, 0 trailing bytes.
1219: ; EAX = Pattern Byte
1220: ; EBX = count of scans to fill ((total scans/ pattern height) + partial)
1221: ; EDX = offset from end of one scan's fill to start of next similar line
1222: ; ESI = pattern data
1223: ; EDI = target address to fill
1224:
1225: align 4
1226: draw_wide_01_loop proc near
1227: UNROLL_LOOP DRAW_WIDE_01,W01,LOOP_UNROLL_COUNT
1228: dec ebx
1229: jnz draw_wide_01_loop
1230: ret
1231:
1232: draw_wide_01_loop endp
1233:
1234:
1235: ;-----------------------------------------------------------------------;
1236: ; Macro to draw n bytes, 0 leading bytes, 0 trailing bytes, then advance
1237: ; to next scan line.
1238:
1239: DRAW_WIDE_10 macro ENTRY_LABEL,ENTRY_INDEX
1240: &ENTRY_LABEL&ENTRY_INDEX&:
1241: mov [edi],ah ;do leading byte
1242: inc edi ;advance poitner
1243: mov ecx,esi ;# of whole words
1244: rep stosw ;fill all whole bytes as dwords
1245: add edi,edx ;point to the next scan line
1246: endm ;-----------------------------------;
1247:
1248: ; N-wide write-only, 0 leading bytes, 0 trailing bytes.
1249: ; EAX = Pattern Byte
1250: ; EBX = count of scans to fill ((total scans/ pattern height) + partial)
1251: ; EDX = offset from end of one scan's fill to start of next similar line
1252: ; ESI = pattern data
1253: ; EDI = target address to fill
1254:
1255: align 4
1256: draw_wide_10_loop proc near
1257: UNROLL_LOOP DRAW_WIDE_10,W10,LOOP_UNROLL_COUNT
1258: dec ebx
1259: jnz draw_wide_10_loop
1260: ret
1261:
1262: draw_wide_10_loop endp
1263:
1264: ;-----------------------------------------------------------------------;
1265: ; Macro to draw n bytes, 0 leading bytes, 0 trailing bytes, then advance
1266: ; to next scan line.
1267:
1268: DRAW_WIDE_11 macro ENTRY_LABEL,ENTRY_INDEX
1269: &ENTRY_LABEL&ENTRY_INDEX&:
1270: mov [edi],ah ;do leading byte
1271: inc edi ;advance poitner
1272: mov ecx,esi ;# of whole words
1273: rep stosw ;fill all whole bytes as dwords
1274: mov [edi],al ;trailing byte
1275: inc edi
1276: add edi,edx ;point to the next scan line
1277: endm ;-----------------------------------;
1278:
1279: ; N-wide write-only, 0 leading bytes, 0 trailing bytes.
1280: ; EAX = Pattern Byte
1281: ; EBX = count of scans to fill ((total scans/ pattern height) + partial)
1282: ; EDX = offset from end of one scan's fill to start of next similar line
1283: ; ESI = pattern data
1284: ; EDI = target address to fill
1285:
1286: align 4
1287: draw_wide_11_loop proc near
1288: UNROLL_LOOP DRAW_WIDE_11,W11,LOOP_UNROLL_COUNT
1289: dec ebx
1290: jnz draw_wide_11_loop
1291: ret
1292:
1293: draw_wide_11_loop endp
1294:
1295: ;-----------------------------------------------------------------------;
1296: ; Unrolled drawing stuff (unrolled to reduce jumps to speed things up),
1297: ; for cases where read before write is NOT required.
1298: ;-----------------------------------------------------------------------;
1299:
1300: ; Tables of entry points into unrolled 1-, 2-, 3-, and 4-wide write-only loops.
1301: ; Note that there may be separate entry tables for various alignments of a
1302: ; specific width, in cases where performance can be improved by using different
1303: ; code for different alignments.
1304:
1305: UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideEvenEntry,W1_EVEN,LOOP_UNROLL_COUNT
1306: UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideOddEntry,W1_ODD,LOOP_UNROLL_COUNT
1307: UNROLL_LOOP_ENTRY_TABLE pfnDraw2WideEvenEntry,W2_EVEN,LOOP_UNROLL_COUNT
1308: UNROLL_LOOP_ENTRY_TABLE pfnDraw2WideOddEntry,W2_ODD,LOOP_UNROLL_COUNT
1309: UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideEvenEntry,W3_EVEN,LOOP_UNROLL_COUNT
1310: UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideOddEntry,W3_ODD,LOOP_UNROLL_COUNT
1311: UNROLL_LOOP_ENTRY_TABLE pfnDraw4WideEvenEntry,W4_EVEN,LOOP_UNROLL_COUNT
1312: UNROLL_LOOP_ENTRY_TABLE pfnDraw4WideOddEntry,W4_ODD,LOOP_UNROLL_COUNT
1313: UNROLL_LOOP_ENTRY_TABLE pfnDraw5WideEvenEntry,W5_EVEN,LOOP_UNROLL_COUNT
1314: UNROLL_LOOP_ENTRY_TABLE pfnDraw5WideOddEntry,W5_ODD,LOOP_UNROLL_COUNT
1315: UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideEvenEntry,W6_EVEN,LOOP_UNROLL_COUNT
1316: UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideOddEntry,W6_ODD,LOOP_UNROLL_COUNT
1317: UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideEvenEntry,W7_EVEN,LOOP_UNROLL_COUNT
1318: UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideOddEntry,W7_ODD,LOOP_UNROLL_COUNT
1319: UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideEvenEntry,W8_EVEN,LOOP_UNROLL_COUNT
1320: UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideOddEntry,W8_ODD,LOOP_UNROLL_COUNT
1321:
1322: ;-----------------------------------------------------------------------;
1323: ; Unrolled 1-, 2-, 3-, and 4-wide write-only edge-drawing loops.
1324: ;
1325: ; Entry:
1326: ; AL/AX/EAX = pixel mask (if AX or EAX, then 0xFFFF or 0xFFFFFFFF)
1327: ; EBX = unrolled loop count
1328: ; ECX = scan line width in bytes
1329: ; EDI = start offset
1330: ;
1331: ; EBX, EDI modified. All other registers preserved.
1332:
1333: ;-----------------------------------------------------------------------;
1334: ; Macro to draw one write-only byte, then advance to next scan line.
1335:
1336: DRAW_1_WIDE_EVEN macro ENTRY_LABEL,ENTRY_INDEX
1337: &ENTRY_LABEL&ENTRY_INDEX&:
1338: mov [edi],al ;we always read 0xFF, so AL is written
1339: ; as-is; because we're in write mode 3,
1340: ; AL becomes the Bit Mask
1341: add edi,edx ;point to the next scan line
1342: endm ;-----------------------------------;
1343:
1344: ; 1-wide write-only.
1345:
1346: align 4
1347: draw_1_wide_even_loop proc near
1348: UNROLL_LOOP DRAW_1_WIDE_EVEN,W1_EVEN,LOOP_UNROLL_COUNT
1349: dec ebx
1350: jnz draw_1_wide_even_loop
1351:
1352: ret
1353:
1354: draw_1_wide_even_loop endp
1355:
1356: DRAW_1_WIDE_ODD macro ENTRY_LABEL,ENTRY_INDEX
1357: &ENTRY_LABEL&ENTRY_INDEX&:
1358: mov [edi],ah ;we always read 0xFF, so AL is written
1359: ; as-is; because we're in write mode 3,
1360: ; AL becomes the Bit Mask
1361: add edi,edx ;point to the next scan line
1362: endm ;-----------------------------------;
1363:
1364: ; 1-wide write-only.
1365:
1366: align 4
1367: draw_1_wide_odd_loop proc near
1368: UNROLL_LOOP DRAW_1_WIDE_ODD,W1_ODD,LOOP_UNROLL_COUNT
1369: dec ebx
1370: jnz draw_1_wide_odd_loop
1371:
1372: ret
1373:
1374: draw_1_wide_odd_loop endp
1375:
1376: ;-----------------------------------------------------------------------;
1377: ; Macro to draw two write-only bytes, then advance to next scan line.
1378:
1379: DRAW_2_WIDE_EVEN macro ENTRY_LABEL,ENTRY_INDEX
1380: &ENTRY_LABEL&ENTRY_INDEX&:
1381: mov [edi],ax
1382: add edi,edx ;point to the next scan line
1383: endm ;-----------------------------------;
1384:
1385: ; 2-wide write-only.
1386:
1387: align 4
1388: draw_2_wide_even_loop proc near
1389: UNROLL_LOOP DRAW_2_WIDE_EVEN,W2_EVEN,LOOP_UNROLL_COUNT
1390: dec ebx
1391: jnz draw_2_wide_even_loop
1392:
1393: ret
1394:
1395: draw_2_wide_even_loop endp
1396:
1397: DRAW_2_WIDE_ODD macro ENTRY_LABEL,ENTRY_INDEX
1398: &ENTRY_LABEL&ENTRY_INDEX&:
1399: mov [edi],ah
1400: mov [edi+1],al
1401: add edi,edx ;point to the next scan line
1402: endm ;-----------------------------------;
1403:
1404: ; 2-wide write-only.
1405:
1406: align 4
1407: draw_2_wide_odd_loop proc near
1408: UNROLL_LOOP DRAW_2_WIDE_ODD,W2_ODD,LOOP_UNROLL_COUNT
1409: dec ebx
1410: jnz draw_2_wide_odd_loop
1411:
1412: ret
1413:
1414: draw_2_wide_odd_loop endp
1415:
1416: ;-----------------------------------------------------------------------;
1417: ; Macro to draw three write-only bytes, then advance to next scan line.
1418: ; Optimized for even start address.
1419:
1420: DRAW_3_WIDE_EVEN macro ENTRY_LABEL,ENTRY_INDEX
1421: &ENTRY_LABEL&ENTRY_INDEX&:
1422: mov [edi],ax
1423: mov [edi+2],al
1424: add edi,edx ;point to the next scan line
1425: endm ;-----------------------------------;
1426:
1427: ; 3-wide write-only, starting at an even address.
1428:
1429: align 4
1430: draw_3_wide_even_loop proc near
1431: UNROLL_LOOP DRAW_3_WIDE_EVEN,W3_EVEN,LOOP_UNROLL_COUNT
1432: dec ebx
1433: jnz draw_3_wide_even_loop
1434:
1435: ret
1436:
1437: draw_3_wide_even_loop endp
1438:
1439: ;-----------------------------------------------------------------------;
1440: ; Macro to draw three write-only bytes, then advance to next scan line.
1441: ; Optimized for odd start address.
1442:
1443: DRAW_3_WIDE_ODD macro ENTRY_LABEL,ENTRY_INDEX
1444: &ENTRY_LABEL&ENTRY_INDEX&:
1445: mov [edi],ah
1446: mov [edi+1],ax
1447: add edi,edx ;point to the next scan line
1448: endm ;-----------------------------------;
1449:
1450: ; 3-wide write-only, starting at an odd address.
1451:
1452: align 4
1453: draw_3_wide_odd_loop proc near
1454: UNROLL_LOOP DRAW_3_WIDE_ODD,W3_ODD,LOOP_UNROLL_COUNT
1455: dec ebx
1456: jnz draw_3_wide_odd_loop
1457:
1458: ret
1459:
1460: draw_3_wide_odd_loop endp
1461:
1462:
1463: ;-----------------------------------------------------------------------;
1464: ; Macro to draw three write-only bytes, then advance to next scan line.
1465: ; Optimized for even start address.
1466:
1467: DRAW_4_WIDE_EVEN macro ENTRY_LABEL,ENTRY_INDEX
1468: &ENTRY_LABEL&ENTRY_INDEX&:
1469: mov [edi],ax
1470: mov [edi+2],ax
1471: add edi,edx ;point to the next scan line
1472: endm ;-----------------------------------;
1473:
1474: ; 4-wide write-only, starting at an even address.
1475:
1476: align 4
1477: draw_4_wide_even_loop proc near
1478: UNROLL_LOOP DRAW_4_WIDE_EVEN,W4_EVEN,LOOP_UNROLL_COUNT
1479: dec ebx
1480: jnz draw_4_wide_even_loop
1481:
1482: ret
1483:
1484: draw_4_wide_even_loop endp
1485:
1486: ;-----------------------------------------------------------------------;
1487: ; Macro to draw three write-only bytes, then advance to next scan line.
1488: ; Optimized for odd start address.
1489:
1490: DRAW_4_WIDE_ODD macro ENTRY_LABEL,ENTRY_INDEX
1491: &ENTRY_LABEL&ENTRY_INDEX&:
1492: mov [edi],ah
1493: mov [edi+1],ax
1494: mov [edi+3],al
1495: add edi,edx ;point to the next scan line
1496: endm ;-----------------------------------;
1497:
1498: ; 4-wide write-only, starting at an odd address.
1499:
1500: align 4
1501: draw_4_wide_odd_loop proc near
1502: UNROLL_LOOP DRAW_4_WIDE_ODD,W4_ODD,LOOP_UNROLL_COUNT
1503: dec ebx
1504: jnz draw_4_wide_odd_loop
1505:
1506: ret
1507:
1508: draw_4_wide_odd_loop endp
1509:
1510: ;-----------------------------------------------------------------------;
1511: ; Macro to draw three write-only bytes, then advance to next scan line.
1512: ; Optimized for even start address.
1513:
1514: DRAW_5_WIDE_EVEN macro ENTRY_LABEL,ENTRY_INDEX
1515: &ENTRY_LABEL&ENTRY_INDEX&:
1516: mov [edi],ax
1517: mov [edi+2],ax
1518: mov [edi+4],al
1519: add edi,edx ;point to the next scan line
1520: endm ;-----------------------------------;
1521:
1522: ; 5-wide write-only, starting at an even address.
1523:
1524: align 4
1525: draw_5_wide_even_loop proc near
1526: UNROLL_LOOP DRAW_5_WIDE_EVEN,W5_EVEN,LOOP_UNROLL_COUNT
1527: dec ebx
1528: jnz draw_5_wide_even_loop
1529:
1530: ret
1531:
1532: draw_5_wide_even_loop endp
1533:
1534: ;-----------------------------------------------------------------------;
1535: ; Macro to draw three write-only bytes, then advance to next scan line.
1536: ; Optimized for odd start address.
1537:
1538: DRAW_5_WIDE_ODD macro ENTRY_LABEL,ENTRY_INDEX
1539: &ENTRY_LABEL&ENTRY_INDEX&:
1540: mov [edi],ah
1541: mov [edi+1],ax
1542: mov [edi+3],ax
1543: add edi,edx ;point to the next scan line
1544: endm ;-----------------------------------;
1545:
1546: ; 5-wide write-only, starting at an odd address.
1547:
1548: align 4
1549: draw_5_wide_odd_loop proc near
1550: UNROLL_LOOP DRAW_5_WIDE_ODD,W5_ODD,LOOP_UNROLL_COUNT
1551: dec ebx
1552: jnz draw_5_wide_odd_loop
1553:
1554: ret
1555:
1556: draw_5_wide_odd_loop endp
1557:
1558: ;-----------------------------------------------------------------------;
1559: ; Macro to draw three write-only bytes, then advance to next scan line.
1560: ; Optimized for even start address.
1561:
1562: DRAW_6_WIDE_EVEN macro ENTRY_LABEL,ENTRY_INDEX
1563: &ENTRY_LABEL&ENTRY_INDEX&:
1564: mov [edi],ax
1565: mov [edi+2],ax
1566: mov [edi+4],ax
1567: add edi,edx ;point to the next scan line
1568: endm ;-----------------------------------;
1569:
1570: ; 6-wide write-only, starting at an even address.
1571:
1572: align 4
1573: draw_6_wide_even_loop proc near
1574: UNROLL_LOOP DRAW_6_WIDE_EVEN,W6_EVEN,LOOP_UNROLL_COUNT
1575: dec ebx
1576: jnz draw_6_wide_even_loop
1577:
1578: ret
1579:
1580: draw_6_wide_even_loop endp
1581:
1582: ;-----------------------------------------------------------------------;
1583: ; Macro to draw three write-only bytes, then advance to next scan line.
1584: ; Optimized for odd start address.
1585:
1586: DRAW_6_WIDE_ODD macro ENTRY_LABEL,ENTRY_INDEX
1587: &ENTRY_LABEL&ENTRY_INDEX&:
1588: mov [edi],ah
1589: mov [edi+1],ax
1590: mov [edi+3],ax
1591: mov [edi+5],al
1592: add edi,edx ;point to the next scan line
1593: endm ;-----------------------------------;
1594:
1595: ; 6-wide write-only, starting at an odd address.
1596:
1597: align 4
1598: draw_6_wide_odd_loop proc near
1599: UNROLL_LOOP DRAW_6_WIDE_ODD,W6_ODD,LOOP_UNROLL_COUNT
1600: dec ebx
1601: jnz draw_6_wide_odd_loop
1602:
1603: ret
1604:
1605: draw_6_wide_odd_loop endp
1606:
1607: ;-----------------------------------------------------------------------;
1608: ; Macro to draw three write-only bytes, then advance to next scan line.
1609: ; Optimized for even start address.
1610:
1611: DRAW_7_WIDE_EVEN macro ENTRY_LABEL,ENTRY_INDEX
1612: &ENTRY_LABEL&ENTRY_INDEX&:
1613: mov [edi],ax
1614: mov [edi+2],ax
1615: mov [edi+4],ax
1616: mov [edi+6],al
1617: add edi,edx ;point to the next scan line
1618: endm ;-----------------------------------;
1619:
1620: ; 7-wide write-only, starting at an even address.
1621:
1622: align 4
1623: draw_7_wide_even_loop proc near
1624: UNROLL_LOOP DRAW_7_WIDE_EVEN,W7_EVEN,LOOP_UNROLL_COUNT
1625: dec ebx
1626: jnz draw_7_wide_even_loop
1627:
1628: ret
1629:
1630: draw_7_wide_even_loop endp
1631:
1632: ;-----------------------------------------------------------------------;
1633: ; Macro to draw three write-only bytes, then advance to next scan line.
1634: ; Optimized for odd start address.
1635:
1636: DRAW_7_WIDE_ODD macro ENTRY_LABEL,ENTRY_INDEX
1637: &ENTRY_LABEL&ENTRY_INDEX&:
1638: mov [edi],ah
1639: mov [edi+1],ax
1640: mov [edi+3],ax
1641: mov [edi+5],ax
1642: add edi,edx ;point to the next scan line
1643: endm ;-----------------------------------;
1644:
1645: ; 7-wide write-only, starting at an odd address.
1646:
1647: align 4
1648: draw_7_wide_odd_loop proc near
1649: UNROLL_LOOP DRAW_7_WIDE_ODD,W7_ODD,LOOP_UNROLL_COUNT
1650: dec ebx
1651: jnz draw_7_wide_odd_loop
1652:
1653: ret
1654:
1655: draw_7_wide_odd_loop endp
1656:
1657: ;-----------------------------------------------------------------------;
1658: ; Macro to draw three write-only bytes, then advance to next scan line.
1659: ; Optimized for even start address.
1660:
1661: DRAW_8_WIDE_EVEN macro ENTRY_LABEL,ENTRY_INDEX
1662: &ENTRY_LABEL&ENTRY_INDEX&:
1663: mov [edi],ax
1664: mov [edi+2],ax
1665: mov [edi+4],ax
1666: mov [edi+6],ax
1667: add edi,edx ;point to the next scan line
1668: endm ;-----------------------------------;
1669:
1670: ; 8-wide write-only, starting at an even address.
1671:
1672: align 4
1673: draw_8_wide_even_loop proc near
1674: UNROLL_LOOP DRAW_8_WIDE_EVEN,W8_EVEN,LOOP_UNROLL_COUNT
1675: dec ebx
1676: jnz draw_8_wide_even_loop
1677:
1678: ret
1679:
1680: draw_8_wide_even_loop endp
1681:
1682: ;-----------------------------------------------------------------------;
1683: ; Macro to draw three write-only bytes, then advance to next scan line.
1684: ; Optimized for odd start address.
1685:
1686: DRAW_8_WIDE_ODD macro ENTRY_LABEL,ENTRY_INDEX
1687: &ENTRY_LABEL&ENTRY_INDEX&:
1688: mov [edi],ah
1689: mov [edi+1],ax
1690: mov [edi+3],ax
1691: mov [edi+5],ax
1692: mov [edi+7],al
1693: add edi,edx ;point to the next scan line
1694: endm ;-----------------------------------;
1695:
1696: ; 8-wide write-only, starting at an odd address.
1697:
1698: align 4
1699: draw_8_wide_odd_loop proc near
1700: UNROLL_LOOP DRAW_8_WIDE_ODD,W8_ODD,LOOP_UNROLL_COUNT
1701: dec ebx
1702: jnz draw_8_wide_odd_loop
1703:
1704: ret
1705:
1706: draw_8_wide_odd_loop endp
1707:
1708: ;-----------------------------------------------------------------------;
1709: ; Unrolled 1-, 2-, 3-, and 4-wide read before write drawing loops.
1710: ;
1711: ; Entry:
1712: ; AL = pixel mask
1713: ; EBX = unrolled loop count
1714: ; ECX = scan line width in bytes
1715: ; EDI = start offset
1716: ;
1717: ; EBX, EDI modified. All other registers preserved.
1718:
1719: UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideEntry,RW1,LOOP_UNROLL_COUNT
1720:
1721: ;-----------------------------------------------------------------------;
1722: ; Macro to draw one read before write byte, then advance to next scan line.
1723:
1724: DRAW_1_WIDE macro ENTRY_LABEL,ENTRY_INDEX
1725: &ENTRY_LABEL&ENTRY_INDEX&:
1726: mov dh,[edi] ;load latches w/o destroying our data
1727: mov [edi],al ;write out our byte
1728: add edi,ecx ;move to the next blind
1729: endm ;-----------------------------------;
1730:
1731: ; 1-wide read/write.
1732:
1733: align 4
1734: draw_1_wide_loop proc near
1735: UNROLL_LOOP DRAW_1_WIDE,RW1,LOOP_UNROLL_COUNT
1736: dec ebx
1737: jnz draw_1_wide_loop
1738:
1739: ret
1740:
1741: draw_1_wide_loop endp
1742:
1743: ; Tables of entry points into unrolled 1-, 2-, 3-, and 4-wide, and 5-or-wider
1744: ; read before write loops.
1745:
1746: UNROLL_LOOP_ENTRY_TABLE pfnDraw1RWEntry,RWW1,LOOP_UNROLL_COUNT
1747: UNROLL_LOOP_ENTRY_TABLE pfnDraw2RWEntry,RWW2,LOOP_UNROLL_COUNT
1748: UNROLL_LOOP_ENTRY_TABLE pfnDraw3RWEntry,RWW3,LOOP_UNROLL_COUNT
1749: UNROLL_LOOP_ENTRY_TABLE pfnDraw4RWEntry,RWW4,LOOP_UNROLL_COUNT
1750: UNROLL_LOOP_ENTRY_TABLE pfnDrawRWWideEntry,RWWIDE,LOOP_UNROLL_COUNT
1751:
1752:
1753: ;-----------------------------------------------------------------------;
1754: ; Unrolled 1-, 2-, 3-, and 4-wide read before write drawing loops.
1755: ;
1756: ; Entry:
1757: ; AL = pixel mask
1758: ; EBX = unrolled loop count
1759: ; ECX = scan line width in bytes
1760: ; EDI = start offset
1761: ;
1762: ; EBX, EDI modified. All other registers preserved.
1763:
1764: ;-----------------------------------------------------------------------;
1765: ; Macro to draw one read before write byte, then advance to next scan line.
1766:
1767: DRAW_1_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
1768: &ENTRY_LABEL&ENTRY_INDEX&:
1769: mov ah,[edi]
1770: mov [edi],al
1771: add edi,edx ;point to the next scan line
1772: endm ;-----------------------------------;
1773:
1774: ; 1-wide read/write.
1775:
1776: align 4
1777: draw_1_wide_rop_loop proc near
1778: UNROLL_LOOP DRAW_1_WIDE_RW,RWW1,LOOP_UNROLL_COUNT
1779: dec ebx
1780: jnz draw_1_wide_rop_loop
1781:
1782: ret
1783:
1784: draw_1_wide_rop_loop endp
1785:
1786: ;-----------------------------------------------------------------------;
1787: ; Macro to draw two read before write bytes, then advance to next scan line.
1788:
1789: DRAW_2_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
1790: &ENTRY_LABEL&ENTRY_INDEX&:
1791: mov ah,[edi]
1792: mov [edi],al
1793: mov ah,[edi+1]
1794: mov [edi+1],al
1795: add edi,edx ;point to the next scan line
1796: endm ;-----------------------------------;
1797:
1798: ; 2-wide read/write.
1799:
1800: align 4
1801: draw_2_wide_rop_loop proc near
1802: UNROLL_LOOP DRAW_2_WIDE_RW,RWW2,LOOP_UNROLL_COUNT
1803: dec ebx
1804: jnz draw_2_wide_rop_loop
1805:
1806: ret
1807:
1808: draw_2_wide_rop_loop endp
1809:
1810: ;-----------------------------------------------------------------------;
1811: ; Macro to draw three read before write bytes, then advance to next scan line.
1812:
1813: DRAW_3_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
1814: &ENTRY_LABEL&ENTRY_INDEX&:
1815: mov ah,[edi]
1816: mov [edi],al
1817: mov ah,[edi+1]
1818: mov [edi+1],al
1819: mov ah,[edi+2]
1820: mov [edi+2],al
1821: add edi,edx ;point to the next scan line
1822: endm ;-----------------------------------;
1823:
1824: ; 3-wide read/write.
1825:
1826: align 4
1827: draw_3_wide_rop_loop proc near
1828: UNROLL_LOOP DRAW_3_WIDE_RW,RWW3,LOOP_UNROLL_COUNT
1829: dec ebx
1830: jnz draw_3_wide_rop_loop
1831:
1832: ret
1833:
1834: draw_3_wide_rop_loop endp
1835:
1836: ;-----------------------------------------------------------------------;
1837: ; Macro to draw four read before write bytes, then advance to next scan line.
1838:
1839: DRAW_4_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
1840: &ENTRY_LABEL&ENTRY_INDEX&:
1841: mov ah,[edi]
1842: mov [edi],al
1843: mov ah,[edi+1]
1844: mov [edi+1],al
1845: mov ah,[edi+2]
1846: mov [edi+2],al
1847: mov ah,[edi+3]
1848: mov [edi+3],al
1849: add edi,edx ;point to the next scan line
1850: endm ;-----------------------------------;
1851:
1852: ; 4-wide read/write.
1853:
1854: align 4
1855: draw_4_wide_rop_loop proc near
1856: UNROLL_LOOP DRAW_4_WIDE_RW,RWW4,LOOP_UNROLL_COUNT
1857: dec ebx
1858: jnz draw_4_wide_rop_loop
1859:
1860: ret
1861:
1862: draw_4_wide_rop_loop endp
1863:
1864: ;-----------------------------------------------------------------------;
1865: ; Unrolled 5-or-wider read before write loop.
1866: ;
1867: ; Entry:
1868: ; EAX = # of bytes to fill across scan line (needed only by 5-or-wider
1869: ; handler)
1870: ; EBX = unrolled loop count
1871: ; EDX = offset from end of one scan line to the start of the next next
1872: ; EDI = start offset
1873: ;
1874: ; EBX, ECX, ESI, EDI modified. All other registers preserved.
1875:
1876: ;-----------------------------------------------------------------------;
1877: ; Macro to draw five or more read before write bytes, then advance to
1878: ; next scan line. (Actually, will handle any number of bytes,
1879: ; including 0, but there are special-case handlers for narrow cases.)
1880: ; Works because reads of display memory return 0ffh, which then becomes the
1881: ; Bit Mask as it's written in write mode 3.
1882:
1883: DRAW_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX
1884: &ENTRY_LABEL&ENTRY_INDEX&:
1885: mov ecx,esi
1886: @@: mov ah,[edi]
1887: mov [edi],al
1888: inc edi
1889: dec ecx
1890: jnz @b
1891: add edi,edx
1892: endm ;-----------------------------------;
1893:
1894: ; 5-or-wider read/write.
1895:
1896: align 4
1897: draw_wide_rop_loop proc near
1898: UNROLL_LOOP DRAW_WIDE_RW,RWWIDE,LOOP_UNROLL_COUNT
1899: dec ebx
1900: jnz draw_wide_rop_loop
1901:
1902: ret
1903:
1904: draw_wide_rop_loop endp
1905:
1906: _TEXT$01 ends
1907:
1908: end
1909:
1910: ;masm386 -Mx -I..\..\inc -I..\..\..\inc -Id:\nt\public\sdk\inc -Di386=1 -DNT_INST=0 -DNT_UP=1 -DSTD_CALL -DDBG=1 -DDEVL=1 i386\patblt.asm,obj\i386\patblt.obj,x.lst;
1911:
1912:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.