|
|
1.1 ! root 1: ;-----------------------------------------------------------------------; ! 2: ! 3: ; Set LOOP_UNROLL_SHIFT to the log2 of the number of times you want loops in ! 4: ; this module unrolled. For example, LOOP_UNROLL_SHIFT of 3 yields 2**3 = 8 ! 5: ; times unrolling. This is the only thing you need to change to control ! 6: ; unrolling. ! 7: ! 8: LOOP_UNROLL_SHIFT equ 2 ! 9: ! 10: ;-----------------------------------------------------------------------; ! 11: ! 12: ; This delay is necessitated by a bug in the ATI Ultra when running in ! 13: ; VGA mode. ! 14: ! 15: SLOW_OUT macro ! 16: push ecx ! 17: pop ecx ! 18: out dx,ax ! 19: endm ! 20: ! 21: .386 ! 22: ! 23: ifndef DOS_PLATFORM ! 24: .model small,c ! 25: else ! 26: ifdef STD_CALL ! 27: .model small,c ! 28: else ! 29: .model small,pascal ! 30: endif; STD_CALL ! 31: endif; DOS_PLATFORM ! 32: ! 33: assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT ! 34: assume fs:nothing,gs:nothing ! 35: ! 36: .xlist ! 37: include stdcall.inc ;calling convention cmacros ! 38: include i386\egavga.inc ! 39: include i386\strucs.inc ! 40: include i386\unroll.inc ! 41: include i386\ropdefs.inc ! 42: include i386\display.inc ; Display specific structures ! 43: ! 44: .list ! 45: ! 46: ;-----------------------------------------------------------------------; ! 47: ! 48: .data ! 49: ! 50: ; ! 51: ; We share some tables with vgablts.asm ! 52: ; ! 53: ! 54: extrn jALUFuncTable :byte ! 55: extrn jLeftMask :byte ! 56: extrn jRightMask :byte ! 57: extrn jForceOnTable :byte ! 58: extrn jNotTable :byte ! 59: extrn jInvertDest :byte ! 60: extrn jForceOffTable :byte ! 61: extrn vTrgBlt@20 :dword ! 62: ! 63: ;-----------------------------------------------------------------------; ! 64: ; Table of routines to be called to draw edges, according to which edges are ! 65: ; partial and which edges are whole bytes. ! 66: align 4 ! 67: public pfnEdgeDrawing ! 68: pfnEdgeDrawing label dword ! 69: dd edge_byte_setup ! 70: dd edge_byte_setup ! 71: dd check_next_bank ! 72: dd edge_byte_setup ! 73: ! 74: ;-----------------------------------------------------------------------; ! 75: ; Table of pointers to tables used to find entries points in unrolled wide ! 76: ; whole byte code. ! 77: ! 78: align 4 ! 79: public pfnWideWholeRep ! 80: pfnWideWholeRep label dword ! 81: dd pfnDrawWide00Entry ! 82: dd pfnDrawWide01Entry ! 83: dd pfnDrawWide10Entry ! 84: dd pfnDrawWide11Entry ! 85: ! 86: ;-----------------------------------------------------------------------; ! 87: ; Table of pointers to tables used to find entry points in narrow, special- ! 88: ; cased unrolled replace whole byte code. ! 89: ! 90: ; Note: The breakpoint where one should switch from special-casing to ! 91: ; REP STOS is purely a guess on my part. 8 seemed reasonable. ! 92: ! 93: ; Start address MOD 2 is 0. ! 94: align 4 ! 95: public pfnWholeBytesMod0Entries ! 96: pfnWholeBytesMod0Entries label dword ! 97: dd 0 ;we never get a 0-wide case ! 98: dd pfnDraw1WideEvenEntry ! 99: dd pfnDraw2WideEvenEntry ! 100: dd pfnDraw3WideEvenEntry ! 101: dd pfnDraw4WideEvenEntry ! 102: dd pfnDraw5WideEvenEntry ! 103: dd pfnDraw6WideEvenEntry ! 104: dd pfnDraw7WideEvenEntry ! 105: dd pfnDraw8WideEvenEntry ! 106: MAX_REPLACE_SPECIAL equ ($-pfnWholeBytesMod0Entries)/4 ! 107: ! 108: ; Start address MOD 2 is 1. ! 109: align 4 ! 110: public pfnWholeBytesMod1Entries ! 111: pfnWholeBytesMod1Entries label dword ! 112: dd 0 ;we never get a 0-wide case ! 113: dd pfnDraw1WideOddEntry ! 114: dd pfnDraw2WideOddEntry ! 115: dd pfnDraw3WideOddEntry ! 116: dd pfnDraw4WideOddEntry ! 117: dd pfnDraw5WideOddEntry ! 118: dd pfnDraw6WideOddEntry ! 119: dd pfnDraw7WideOddEntry ! 120: dd pfnDraw8WideOddEntry ! 121: ! 122: ! 123: ;-----------------------------------------------------------------------; ! 124: ; Table of pointers to tables used to find entries points in narrow, special- ! 125: ; cased unrolled non-replace whole byte code. ! 126: ! 127: ; Note: The breakpoint where one should switch from special-casing to ! 128: ; REP MOVSB is purely a guess on my part. 5 seemed reasonable. ! 129: ! 130: align 4 ! 131: pfnWholeBytesNonReplace label dword ! 132: dd 0 ;we never get a 0-wide case ! 133: dd pfnDraw1RWEntry ! 134: dd pfnDraw2RWEntry ! 135: dd pfnDraw3RWEntry ! 136: dd pfnDraw4RWEntry ! 137: MAX_NON_REPLACE_SPECIAL equ ($-pfnWholeBytesNonReplace)/4 ! 138: ! 139: ; Master MOD 2 alignment look-up table for entry tables for two possible ! 140: ; alignments for narrow, special-cased unrolled replace whole byte code. ! 141: align 4 ! 142: public pfnWholeBytesSpecial ! 143: pfnWholeBytesSpecial label dword ! 144: dd pfnWholeBytesMod0Entries ! 145: dd pfnWholeBytesMod1Entries ! 146: ! 147: .code ! 148: ! 149: ;============================================================================= ! 150: ! 151: _TEXT$01 SEGMENT DWORD USE32 PUBLIC 'CODE' ! 152: ASSUME CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING ! 153: ! 154: cProc vMonoPatBlt,24,< \ ! 155: uses esi edi ebx, \ ! 156: pdsurf: ptr DEVSURF, \ ! 157: culRcl: dword, \ ! 158: prcl: ptr RECTL, \ ! 159: ulMix: dword, \ ! 160: pBrush: ptr oem_brush_def, \ ! 161: pBrushOrg: ptr POINTL > ! 162: ! 163: local ulRowOffset :dword ;Offset from start of scan line ! 164: ; first byte to fill ! 165: local ulWholeBytes :dword ;# of whole bytes to fill ! 166: local ulWholeWords :dword ;# of whole words to fill excluding ! 167: ;leading and/or trailing bytes ! 168: local pfnWholeFn :dword ;pointer to routine used to draw ! 169: ; whole bytes ! 170: local ulScanWidth :dword ;offset from start of one scan to start ! 171: ; of next ! 172: local ulNextScan :dword ;offset from end of one scan line's ! 173: ; fill to start of next ! 174: local ulCurrentTopScan :dword ;top scan line to fill in current bank ! 175: local ulMasks :dword ;low byte = right mask, high byte = ! 176: ; left mask ! 177: local ulBottomScan :dword ;bottom scan line of fill rectangle ! 178: ! 179: local jALUFunc :dword ;VGA ALU logical operation (SET, AND, ! 180: ; OR, or XOR) ! 181: local pfnStartDrawing :dword ;pointer to function to call to start ! 182: ; drawing ! 183: local pfnContinueDrawing :dword ;pointer to function to call to ! 184: ; continue drawing after doing whole ! 185: ; bytes ! 186: local ulLeftEdgeAdjust :dword ;used to bump the whole bytes start ! 187: ; address past the left edge when the ! 188: ; left edge is partial ! 189: local pfnWholeBytes :dword ;pointer to table of entry points ! 190: ; into unrolled loops for whole byte ! 191: ; filling ! 192: local ulSpecialBytes ;If we are doing a special case wide ! 193: ; fill, this will be the width of the ! 194: ; fill. We need this so we can properly ! 195: ; increment to the next line. ! 196: local ulVbNextScan :dword ;Offset from the end of the current ! 197: ; wide fill drawing operation to the ! 198: ; top of the next venetian blind line ! 199: local fdInvertDestFirst :dword;1 if the rop requires a pass to invert ! 200: ; the destination before the normal ! 201: ; pass ! 202: ! 203: local ulPatternOrgY: dword ;Local copy of the pattern offset Y ! 204: ! 205: local ulVbBlindCount :dword ;Temp Height of pattern. ! 206: ! 207: local ulVbTopScan :dword ;slats in our blinds ! 208: ! 209: local ulVbStartScan :dword ;Current to slat ! 210: ! 211: local pUlVbPattern:dword ;inner loop pattern pointer ! 212: ! 213: local pUlPattern:dword ;current pattern with proper Y offset ! 214: ! 215: local ulVbMask ;Inversion mask for partial edges ! 216: ! 217: local ulVbYRound ; ! 218: ! 219: local ulVbYShift ; ! 220: ! 221: local RotatedPat[32]:byte ;Aligned pattern buffer ! 222: ! 223: local ulFgClr:dword ;Local copy of the foreground color ! 224: ! 225: local ulBkClr:dword ;Local copy of the background color ! 226: ! 227: local pfnWesTrick:dword ;Pointer to the desired inner loop ! 228: ; wes trick code. While we are doing ! 229: ; a ROP to full bytes, this will point ! 230: ; to do_wide_wes_trick otherwise it ! 231: ; will point to do_edge_wes_trick for ! 232: ; the edge cases ! 233: cld ! 234: ! 235: ;-----------------------------------------------------------------------; ! 236: ; Make sure there's something to draw; clip enumerations can be empty. ! 237: ;-----------------------------------------------------------------------; ! 238: ! 239: cmp culRcl,0 ;any rects to fill? ! 240: jz vMonoPatBlts_done ;no, we're done ! 241: ! 242: mov esi,pBrush ;point to the brush ! 243: ! 244: xor eax,eax ! 245: mov al,[esi + oem_brush_fg] ! 246: mov ulFgClr,eax ;Make local copy of the fg color ! 247: ! 248: mov al,[esi + oem_brush_bg] ! 249: mov ulBkClr,eax ;Make local copy of the bk color ! 250: ! 251: ;-----------------------------------------------------------------------; ! 252: ; Set up for the desired raster op. ! 253: ;-----------------------------------------------------------------------; ! 254: sub ebx,ebx ;ignore any background mix; we're only ! 255: mov bl,byte ptr ulMix ; concerned with the foreground in this ! 256: ; module ! 257: cmp ebx,R2_NOP ;is this NOP? ! 258: jz vMonoPatBlts_done ;yes, we're done ! 259: sub eax,eax ;we want a dword ! 260: mov al,jInvertDest[ebx] ;remember whether we need to invert the ! 261: mov fdInvertDestFirst,eax ; destination before finishing the rop ! 262: mov eax,ulFgClr ! 263: and al,jForceOffTable[ebx] ;force color to 0 if necessary ! 264: ; (R2_BLACK) ! 265: or al,jForceOnTable[ebx] ;force color to 0ffh if necessary ! 266: ; (R2_WHITE, R2_NOT) ! 267: xor al,jNotTable[ebx] ;invert color if necessary (any Pn mix) ! 268: ;at this point, CH has the color we ! 269: ; want to draw with; set up the VGA ! 270: ; hardware to draw with that color ! 271: mov ulFgClr,eax ! 272: ! 273: mov eax,ulBkClr ! 274: and al,jForceOffTable[ebx] ;force color to 0 if necessary ! 275: ; (R2_BLACK) ! 276: or al,jForceOnTable[ebx] ;force color to 0ffh if necessary ! 277: ; (R2_WHITE, R2_NOT) ! 278: xor al,jNotTable[ebx] ;invert color if necessary (any Pn mix) ! 279: ;at this point, CH has the color we ! 280: ; want to draw with; set up the VGA ! 281: ; hardware to draw with that color ! 282: mov ulBkClr,eax ! 283: ! 284: mov ah,jALUFuncTable[ebx] ;get the ALU logical function ! 285: and ah,ah ;is the logical function DR_SET? ! 286: .errnz DR_SET ! 287: jz short skip_ALU_set ;yes, don't have to set because that's ! 288: ; the VGA's default state ! 289: mov edx,VGA_BASE + GRAF_ADDR ! 290: mov al,GRAF_DATA_ROT ! 291: SLOW_OUT ;set the ALU logical function ! 292: skip_ALU_set: ! 293: mov byte ptr jALUFunc,ah ;remember the ALU logical function ! 294: ! 295: ;-----------------------------------------------------------------------; ! 296: ; Set up variables that are constant for the entire time we're in this ! 297: ; module. ! 298: ;-----------------------------------------------------------------------; ! 299: mov edx,pBrushOrg ;point to the brush origin ! 300: ! 301: mov ecx,[edx].ptl_x ! 302: and ecx,15 ;eax mod 16 ! 303: ! 304: mov eax,[edx].ptl_y ! 305: mov ulPatternOrgY,eax ! 306: ! 307: ;We are now going to make a copy of our rotated copy of our pattern. ! 308: ;The reason that we do this is because we may be called with several ! 309: ;rectangles and we don't really want to rotate the pattern data for ! 310: ;each rectangle. We copy this rectangle to be double high so that ! 311: ;we can incorperate our Y offest later without having to worry ! 312: ;about running off the end of the pattern. ! 313: ! 314: lea edi,RotatedPat ;Pattern Dest ! 315: mov esi,[esi + oem_brush_pmono] ;Pattern Src ! 316: or cl,cl ! 317: jnz rotate_and_expand ! 318: ! 319: INDEX=0 ! 320: rept 4 ;patterns are 16x8 ! 321: mov eax,[esi+INDEX] ! 322: mov [edi+INDEX],eax ! 323: mov [edi+16+INDEX],eax ! 324: INDEX=INDEX+4 ! 325: endm ;----------------- ! 326: jmp fill_rect_loop ! 327: ! 328: rotate_and_expand: ! 329: INDEX=0 ! 330: rept 8 ;patterns are 16x8 ! 331: mov ah,[esi+INDEX] ;load bytes for shift ! 332: mov al,[esi+1+INDEX] ;convert from little to big endian ! 333: ror ax,cl ;shift into position ! 334: mov [edi+INDEX],ah ;save result ! 335: mov [edi+1+INDEX],al ! 336: mov [edi+16+INDEX],ah ;save result to second copy ! 337: mov [edi+17+INDEX],al ! 338: INDEX=INDEX+2 ! 339: endm ;----------------- ! 340: ! 341: ! 342: fill_rect_loop: ! 343: ;-----------------------------------------------------------------------; ! 344: ; Set up masks and widths. ! 345: ;-----------------------------------------------------------------------; ! 346: mov edi,prcl ;point to rectangle to fill ! 347: ! 348: sub eax,eax ! 349: mov ulLeftEdgeAdjust,eax ;initalize variable ! 350: mov ulSpecialBytes,eax ;initalize variable ! 351: ! 352: mov eax,[edi].yBottom ! 353: mov ulBottomScan,eax ;remember the bottom scan line of fill ! 354: ! 355: mov ebx,[edi].xRight ;right edge of fill (non-inclusive) ! 356: mov ecx,ebx ! 357: and ecx,0111b ;intrabyte address of right edge ! 358: mov ah,jRightMask[ecx] ;right edge mask ! 359: ! 360: mov esi,[edi].xLeft ;left edge of fill (inclusive) ! 361: mov ecx,esi ! 362: shr ecx,3 ;/8 for start offset from left edge ! 363: ; of scan line ! 364: mov ulRowOffset,ecx ;remember offset from start of scan ! 365: ; line ! 366: sub ebx,esi ;width in pixels of fill ! 367: ! 368: and esi,0111b ;intrabyte address of left edge ! 369: mov al,jLeftMask[esi] ;left edge mask ! 370: ! 371: dec ebx ;make inclusive on right ! 372: add ebx,esi ;inclusive width, starting counting at ! 373: ; the beginning of the left edge byte ! 374: shr ebx,3 ;width of fill in bytes touched - 1 ! 375: jnz short more_than_1_byte ;more than 1 byte is involved ! 376: ! 377: ; Only one byte will be affected. Combine first/last masks. ! 378: ! 379: and al,ah ;we'll use first byte mask only ! 380: xor ah,ah ;want last byte mask to be 0 ! 381: inc ebx ;so there's one count to subtract below ! 382: ; if this isn't a whole edge byte ! 383: more_than_1_byte: ! 384: ! 385: ; If all pixels in the left edge are altered, combine the first byte into the ! 386: ; whole byte count and clear the first byte mask, because we can handle solid ! 387: ; edge bytes faster as part of the whole bytes. Ditto for the right edge. ! 388: ! 389: sub ecx,ecx ;edge whole-status accumulator ! 390: cmp al,-1 ;is left edge a whole byte or partial? ! 391: adc ecx,ecx ;ECX=1 if left edge partial, 0 if whole ! 392: sub ebx,ecx ;if left edge partial, deduct it from ! 393: ; the whole bytes count ! 394: mov ulLeftEdgeAdjust,ecx ;for skipping over the left edge if ! 395: ; it's partial when pointing to the ! 396: ; whole bytes ! 397: and ah,ah ;is right edge mask 0, meaning this ! 398: ; fill is only 1 byte wide? ! 399: jz short save_masks ;yes, no need to do anything ! 400: cmp ah,-1 ;is right edge a whole byte or partial? ! 401: jnz short save_masks ;partial ! 402: mov ah,0 ; ! 403: add ecx,2 ;bit 1 of ECX=0 if right edge partial, ! 404: ; 1 if whole; ! 405: ;bit 1=0 if left edge partial, 1 whole ! 406: inc ebx ;if right edge whole, include it in the ! 407: ; whole bytes count ! 408: save_masks: ! 409: mov ulMasks,eax ;save left and right clip masks ! 410: mov ulWholeBytes,ebx ;save # of whole bytes ! 411: ! 412: mov ecx,pfnEdgeDrawing[ecx*4] ;set address of routine to draw ! 413: mov pfnContinueDrawing,ecx ; all partial (non-whole) edges ! 414: ! 415: and ebx,ebx ;any whole bytes? ! 416: jz start_vec_set ;no ! 417: ;yes, so draw the whole bytes before ! 418: ; the edge bytes ! 419: ! 420: ; The whole bytes loop depends on the type of operation being done. If the ! 421: ; operation is one which uses DR_SET, then we can use a STOS-type operation, ! 422: ; else we have to use a MOVSB-type operation (to load the latches with the ! 423: ; existing contents of display memory to allow the ALUs to work). ! 424: cmp byte ptr jALUFunc,DR_SET ;is it a replace-type rop? ! 425: jz short is_replace_type ;yes ! 426: ;no, set up for non-replace whole bytes ! 427: mov ecx,offset non_replace_wide ! 428: ! 429: cmp ebx,MAX_NON_REPLACE_SPECIAL ;too wide to special case? ! 430: jb short non_replace_spec ;nope ! 431: ! 432: lea eax,pfnDrawRWWideEntry ;assume too wide to special-case ! 433: mov pfnWholeBytes,eax ; table for width ! 434: ! 435: jmp short start_vec_set ! 436: ! 437: align 4 ! 438: non_replace_spec: ! 439: ! 440: mov eax,pfnWholeBytesNonReplace[ebx*4] ;no, point to entry ! 441: mov pfnWholeBytes,eax ; table for width ! 442: mov ulSpecialBytes,ebx ! 443: ;narrow enough to special case. Look up ! 444: ; the entry table for the special case ! 445: ; base on the start alignment ! 446: ! 447: jmp short start_vec_set ! 448: ! 449: align 4 ! 450: is_replace_type: ;set up for replace-type rop ! 451: cmp ebx,MAX_REPLACE_SPECIAL ;too wide to special case? ! 452: jnb short is_wide_replace ;yes ! 453: ! 454: mov ulSpecialBytes,ebx ! 455: ;narrow enough to special case. Look up ! 456: ; the entry table for the special case ! 457: ; base on the start alignment ! 458: mov ecx,ulRowOffset ! 459: add ecx,ulLeftEdgeAdjust ;left edge whole bytes start offset ! 460: and ecx,01b ;left edge whole bytes start alignment ! 461: ; MOD 2 ! 462: mov ecx,pfnWholeBytesSpecial[ecx*4] ;look up table of entry ! 463: ; tables for alignment ! 464: mov ecx,[ecx+ebx*4] ;look up entry table for width ! 465: mov pfnWholeBytes,ecx ; table for width ! 466: mov ecx,offset whole_bytes_rep_wide ! 467: ! 468: jmp short start_vec_set ! 469: ! 470: align 4 ! 471: is_wide_replace: ;set up for wide replace-type op ! 472: ;Note: assumes there is at least one ! 473: ; full word involved! ! 474: mov ecx,ulRowOffset ! 475: add ecx,ulLeftEdgeAdjust ;left edge whole bytes start offset ! 476: neg ecx ! 477: and ecx,01b ! 478: mov edx,ebx ! 479: sub edx,ecx ;ignore odd leading bytes ! 480: mov eax,edx ! 481: shr edx,1 ;# of whole words across (not counting ! 482: ; odd leading & trailing bytes) ! 483: mov ulWholeWords,edx ! 484: and eax,01b ;# of odd (fractional) trailing bytes ! 485: add ecx,ecx ! 486: or ecx,eax ;build a look-up index from the number ! 487: ; of leading and trailing bytes ! 488: mov ecx,pfnWideWholeRep[ecx*4] ;proper drawing handler for front/ ! 489: mov pfnWholeBytes,ecx ; back alignment ! 490: mov ecx,offset whole_bytes_rep_wide ! 491: ;set up to call routine to perform wide ! 492: ; whole bytes fill ! 493: ! 494: start_vec_set: ! 495: mov pfnStartDrawing,ecx ; all partial (non-whole) edges ! 496: ! 497: mov ecx,pdsurf ! 498: mov eax,[ecx].dsurf_lNextScan ! 499: mov ulScanWidth,eax ;local copy of scan line width ! 500: sub eax,ebx ;EAX = delta to next scan ! 501: mov ulNextScan,eax ! 502: ! 503: mov esi,pBrush ! 504: mov eax,[esi+oem_brush_height] ! 505: dec eax ! 506: mov ulVbYRound,eax ! 507: mov al,[esi + oem_brush_yshft] ; blind to the next. ! 508: mov ulVbYShift,eax ! 509: ! 510: mov cl,al ! 511: mov eax,UlScanWidth ! 512: shl eax,cl ;ulNextScan * 8 ! 513: mov ulVbNextScan,eax ; ! 514: ! 515: cmp fdInvertDestFirst,1 ;is this an invert-dest-plus-something- ! 516: ; else rop that requires two passes? ! 517: jnz short do_single_pass ! 518: ! 519: lea eax,vTrgBlt@20 ! 520: ptrCall <eax>,<pdsurf, culRcl, prcl, R2_NOT, -1> ! 521: ! 522: mov ah,byte ptr jALUFunc ;reset the ALU logical function ! 523: mov edx,VGA_BASE + GRAF_ADDR ! 524: mov al,GRAF_DATA_ROT ! 525: SLOW_OUT ;set the ALU logical function ! 526: ! 527: do_single_pass: ! 528: call draw_banks ! 529: ! 530: ;-----------------------------------------------------------------------; ! 531: ; See if there are any more rectangles to fill. ! 532: ;-----------------------------------------------------------------------; ! 533: ! 534: add prcl,(size RECTL) ;point to the next rectangle, if there is one ! 535: dec culRcl ;count down the rectangles to fill ! 536: jnz fill_rect_loop ! 537: ! 538: ! 539: ;-----------------------------------------------------------------------; ! 540: ; We have filled all rectangles. Restore the VGA to its default state. ! 541: ;-----------------------------------------------------------------------; ! 542: ! 543: mov edx,VGA_BASE + GRAF_ADDR ! 544: mov eax,0000h + GRAF_ENAB_SR ;disable set/reset ! 545: out dx,ax ! 546: mov eax,GRAF_MODE + ((M_PROC_WRITE + M_DATA_READ) SHL 8) ! 547: out dx,ax ;restore read mode 0 and write mode 0 ! 548: mov eax,(DR_SET shl 8) + GRAF_DATA_ROT ;set the logical function to ! 549: out dx,ax ; SET ! 550: vMonoPatBlts_done: ! 551: cRet vMonoPatBlt ! 552: ! 553: ;-----------------------------------------------------------------------; ! 554: ; Fills all banks in the current fill rectangle. Called once per fill ! 555: ; rectangle, except for destination-inversion-plus-something-else rops. ! 556: ;-----------------------------------------------------------------------; ! 557: ! 558: align 4 ! 559: draw_banks: ! 560: ! 561: ;-----------------------------------------------------------------------; ! 562: ; Map in the bank containing the top scan to fill, if it's not mapped in ! 563: ; already. ! 564: ;-----------------------------------------------------------------------; ! 565: ! 566: mov edi,prcl ;point to rectangle to fill ! 567: mov ecx,pdsurf ;point to surface ! 568: mov eax,[edi].yTop ;top scan line of fill ! 569: mov ulCurrentTopScan,eax ;this will be the fill top in 1st bank ! 570: ! 571: cmp eax,[ecx].dsurf_rcl1WindowClip.yTop ;is fill top less than ! 572: ; current bank? ! 573: jl short map_init_bank ;yes, map in proper bank ! 574: cmp eax,[ecx].dsurf_rcl1WindowClip.yBottom ;fill top greater than ! 575: ; current bank? ! 576: jl short init_bank_mapped ;no, proper bank already mapped ! 577: map_init_bank: ! 578: ! 579: ; Map in the bank containing the top scan line of the fill. ! 580: ! 581: ptrCall <dword ptr [ecx].dsurf_pfnBankControl>,<ecx,eax,JustifyTop> ! 582: ! 583: init_bank_mapped: ! 584: ! 585: ;-----------------------------------------------------------------------; ! 586: ; Main loop for processing fill in each bank. ! 587: ;-----------------------------------------------------------------------; ! 588: ! 589: ; Compute the starting address and scan line count for the initial bank. ! 590: ! 591: mov eax,pdsurf ;EAX->target surface ! 592: mov ebx,ulBottomScan ;bottom of destination rectangle ! 593: cmp ebx,[eax].dsurf_rcl1WindowClip.yBottom ! 594: ;which comes first, the bottom of the ! 595: ; dest rect or the bottom of the ! 596: ; current bank? ! 597: jl short BottomScanSet ;fill bottom comes first, so draw to ! 598: ; that; this is the last bank in fill ! 599: mov ebx,[eax].dsurf_rcl1WindowClip.yBottom ! 600: ;bank bottom comes first; draw to ! 601: ; bottom of bank ! 602: BottomScanSet: ! 603: mov edi,ulCurrentTopScan ;top scan line to fill in current bank ! 604: sub ebx,edi ;# of scans to fill in bank ! 605: imul edi,ulScanWidth ;offset of starting scan line ! 606: ! 607: ; Note that the start of the bitmap will change each time through the ! 608: ; bank loop, because the start of the bitmap is varied to map the ! 609: ; desired scan line to the banking window. ! 610: ! 611: add edi,[eax].dsurf_pvBitmapStart ;start of scan in bitmap ! 612: add edi,ulRowOffset ;EDI = start offset of fill in bitmap ! 613: ! 614: ; We have computed the starting address and scan count. Time to start drawing ! 615: ; in the initial bank. ! 616: ! 617: mov esi,pBrush ;edx = min(PatternHeight,BltHeight) ! 618: mov ecx,[esi + oem_brush_height] ! 619: sub ecx,ebx ! 620: sbb edx,edx ! 621: and edx,ecx ! 622: add edx,ebx ! 623: mov ulVbBlindCount,edx ! 624: ! 625: ; Brush alignment. We need to look at pptlBrush ! 626: ! 627: mov eax,ulCurrentTopScan ;top scan line to fill in current bank ! 628: sub eax,ulPatternOrgY ; ! 629: ! 630: jns short pos_y_offset ; ! 631: neg eax ; ! 632: and eax,7 ;-eax mod 8 ! 633: neg eax ; ! 634: add eax,8 ; ! 635: jmp short save_pat_pointer ! 636: pos_y_offset: ! 637: and eax,7 ;eax mod 8 ! 638: save_pat_pointer: ! 639: add eax,eax ;Y Offset * PatternWidth (2 bytes) ! 640: ! 641: lea edx,RotatedPat ;Pattern Dest ! 642: add eax,edx ! 643: mov pulPattern,eax ;Drawing code uses this as the ! 644: ;source for the pattern ! 645: ! 646: jmp pfnStartDrawing ! 647: ! 648: ! 649: ;-----------------------------------------------------------------------; ! 650: ; Whole byte fills. ! 651: ;-----------------------------------------------------------------------; ! 652: ! 653: ;-----------------------------------------------------------------------; ! 654: ; Handles non-replace whole byte fills wider than the maximum special ! 655: ; case width. ! 656: ; ! 657: ; The destination is not involved, so a STOS (or equivalent) can be used ! 658: ; (no read needed before write). ! 659: ;-----------------------------------------------------------------------; ! 660: ! 661: align 4 ! 662: public whole_bytes_rep_wide ! 663: whole_bytes_rep_wide: ! 664: push ebx ;save scan count ! 665: push edi ;save starting address ! 666: ! 667: add edi,ulLeftEdgeAdjust ;point to first whole byte to fill ! 668: ! 669: mov edx,VGA_BASE + GRAF_ADDR ! 670: mov eax,GRAF_MODE + ((M_COLOR_WRITE + M_COLOR_READ) SHL 8) ! 671: out dx,ax ;write mode 2 ! 672: mov eax,ulBkClr ;Set the write mode to write mode ! 673: mov [edi],al ; three after we load the latches ! 674: mov al,[edi] ; with our background color ! 675: ! 676: mov al,GRAF_SET_RESET ;Set the foreground color ! 677: out dx,al ; into set/reset ! 678: inc edx ! 679: in al,dx ! 680: and eax,0f0h ! 681: or eax,ulFgClr ! 682: out dx,al ! 683: dec edx ! 684: ! 685: mov eax,GRAF_MODE + ((M_AND_WRITE + M_COLOR_READ) SHL 8) ! 686: out dx,ax ;write mode 3 so we can do the masking ! 687: ; without OUTs, read mode 1 so we can ! 688: ; read 0xFF from memory always, for ! 689: ; ANDing (because Color Don't Care is ! 690: ; all zeros) ! 691: ! 692: mov esi,pulPattern ; pointer to pattern bits ! 693: mov ax,[esi] ; into place ! 694: add esi,2 ! 695: mov pulVbPattern,esi ! 696: ! 697: ! 698: mov ulVbTopScan,ebx ;our pattern is 8 high so we don't ! 699: add ebx,ulVbYRound ;Calc the number of lines to do ! 700: mov ecx,ulVbyShift ! 701: shr ebx,cl ;only need to go through the code ! 702: ; count/8 times. We will handle any ! 703: ; extra lines at the bottom ! 704: ; (ulVbTopScan mod 8) in our loops. ! 705: mov edx,pfnWholeBytes ! 706: push ulVbBlindCount ! 707: ! 708: public wide_bytes_loop ! 709: wide_bytes_loop: ! 710: SET_UP_UNROLL_VARS ebx, ecx, ebx, [edx], LOOP_UNROLL_SHIFT ! 711: ! 712: mov esi,ulWholeWords ;number of aligned word writes ! 713: mov edx,ulVbNextScan ;offset from end of one scan line to ! 714: ; start of next the same scan line ! 715: ; in the next pattern. ! 716: sub edx,ulWholeBytes ! 717: add edx,ulSpecialBytes ! 718: ! 719: ! 720: ; eax = rotated pattern ! 721: ; ebx = unrolled count ! 722: ; ecx = routine address ! 723: ; edx = ulVbNextScan ! 724: ; esi = ulFvWholeWords ! 725: ; edi = pDest ! 726: ; ! 727: push edi ;save out dest pointer ! 728: call ecx ;draw the wide whole bytes ! 729: pop edi ;restore out dest pointer ! 730: ! 731: add edi,ulScanWidth ;advance to next scan line ! 732: ! 733: dec ulVbBlindCount ! 734: jz short wide_bytes_end ! 735: ! 736: mov eax,ulVbTopScan ;restore scan count ! 737: dec eax ;Subtract off completed top line ! 738: mov ulVbTopScan,eax ! 739: add eax,ulVbYRound ;Calc the number of lines to do ! 740: mov ecx,ulVbyShift ! 741: shr eax,cl ;for this venetian blind pass ! 742: mov ebx,eax ;including any partial patterns ! 743: ; at the bottom ! 744: ! 745: mov esi,pulVbPattern ;Pattern data ! 746: mov ax,[esi] ;get pattern word ! 747: add esi,2 ! 748: mov pulVbPattern,esi ;save pattern pointer for later ! 749: ! 750: mov edx,pfnWholeBytes ! 751: ! 752: jmp short wide_bytes_loop ! 753: ! 754: wide_bytes_end: ! 755: pop ulVbBlindCount ! 756: pop edi ;restore screen pointer ! 757: pop ebx ;restore fill scan count ! 758: ! 759: mov edx,VGA_BASE + GRAF_ADDR ;restore proper read/write modes ! 760: mov eax,GRAF_MODE + ((M_PROC_WRITE + M_DATA_READ) SHL 8) ! 761: out dx,ax ! 762: ! 763: jmp pfnContinueDrawing ;either keep drawing or we're done ! 764: ! 765: ! 766: ;-----------------------------------------------------------------------; ! 767: ; Handle case where both edges are partial (non-whole) bytes. ! 768: ;-----------------------------------------------------------------------; ! 769: ! 770: align 4 ! 771: public non_replace_wide ! 772: non_replace_wide: ! 773: push ebx ;Save line count ! 774: push edi ;Save Dest Addr ! 775: ! 776: add edi,ulLeftEdgeAdjust ;point to first whole byte to fill ! 777: ! 778: lea eax,do_wide_wes_trick ! 779: mov pfnWesTrick,eax ! 780: ! 781: mov ecx,ulFgClr ! 782: xor ecx,ulBkClr ;mask = ulBkClr ^ ulFgClr ! 783: ! 784: mov ah,cl ;sre = !mask ! 785: not ah ;Set/Reset Enable ! 786: mov edx,EGA_BASE+GRAF_ADDR ! 787: mov al,GRAF_ENAB_SR ! 788: out dx,ax ;Set Set/Reset Enable bits ! 789: ! 790: mov ah,byte ptr ulBkClr ;Set/Reset = background color ! 791: mov al,GRAF_SET_RESET ! 792: out dx,ax ! 793: ! 794: mov eax,GRAF_MODE + ((M_PROC_WRITE + M_COLOR_READ) SHL 8) ! 795: out dx,ax ; Set Read Mode 0 ! 796: ! 797: ;save the width count and pfn here ! 798: ! 799: call wes_trick ! 800: ! 801: mov edx,EGA_BASE+SEQ_DATA ! 802: mov eax,0fh ! 803: out dx,al ! 804: ! 805: mov edx,EGA_BASE+GRAF_ADDR ! 806: mov eax,GRAF_MODE + ((M_PROC_WRITE + M_DATA_READ) SHL 8) ! 807: out dx,ax ! 808: ! 809: mov eax,GRAF_ENAB_SR ! 810: out dx,ax ;Reset Set/Reset Enable bits ! 811: ! 812: pop edi ! 813: pop ebx ! 814: ! 815: jmp pfnContinueDrawing ;either keep drawing or we're done ! 816: ! 817: ;-----------------------------------------------------------------------; ! 818: ; Process any left/right columns that that have to be done. ! 819: ; ! 820: ; Currently: ! 821: ; EBX = height to fill, in scans ! 822: ; EDI --> first byte of left edge ! 823: ;-----------------------------------------------------------------------; ! 824: ! 825: ! 826: ;-----------------------------------------------------------------------; ! 827: ; Handle case where both edges are partial (non-whole) bytes. ! 828: ;-----------------------------------------------------------------------; ! 829: ! 830: align 4 ! 831: public edge_byte_setup ! 832: edge_byte_setup: ! 833: lea eax,do_edge_wes_trick ! 834: mov pfnWesTrick,eax ! 835: ! 836: mov ecx,ulFgClr ! 837: xor ecx,ulBkClr ;mask = ulBkClr ^ ulFgClr ! 838: ! 839: mov ah,cl ;sre = !mask ! 840: not ah ;Set/Reset Enable ! 841: mov edx,EGA_BASE+GRAF_ADDR ! 842: mov al,GRAF_ENAB_SR ! 843: out dx,ax ;Set Set/Reset Enable bits ! 844: ! 845: mov ah,byte ptr ulBkClr ;Set/Reset = foreground color ! 846: mov al,GRAF_SET_RESET ! 847: out dx,ax ! 848: ! 849: mov eax,ulLeftEdgeAdjust ! 850: or eax,eax ! 851: jz short do_right_edge ! 852: ! 853: mov eax,ulMasks ;Get Left/Right edge Masks ! 854: mov ah,al ! 855: mov al,GRAF_BIT_MASK ! 856: mov edx,EGA_BASE+GRAF_ADDR ! 857: out dx,ax ! 858: ! 859: inc pulPattern ;Adjust Pattern rotation ! 860: ! 861: push ebx ;Save line count ! 862: push edi ;Save Dest Addr ! 863: call wes_trick ! 864: pop edi ! 865: pop ebx ! 866: ! 867: mov eax,ulMasks ;restore Left/Right edge Masks ! 868: dec pulPattern ;Adjust Pattern rotation ! 869: ! 870: do_right_edge: ! 871: mov eax,ulMasks ;Get Left/Right edge Masks ! 872: and ah,0ffh ! 873: jz edge_done ! 874: ! 875: mov al,GRAF_BIT_MASK ! 876: mov edx,EGA_BASE+GRAF_ADDR ! 877: out dx,ax ! 878: ! 879: add edi,ulLeftEdgeAdjust ;point to first whole byte to fill ! 880: add edi,ulWholeBytes ;point to right edge byte to fill ! 881: call wes_trick ! 882: ! 883: edge_done: ! 884: mov edx,EGA_BASE+SEQ_DATA ! 885: mov eax,0fh ! 886: out dx,al ! 887: ! 888: mov edx,EGA_BASE+GRAF_ADDR ! 889: mov eax,GRAF_BIT_MASK+0ff00h ! 890: out dx,ax ! 891: ! 892: mov eax,GRAF_ENAB_SR ! 893: out dx,ax ;Reset Set/Reset Enable bits ! 894: ! 895: ! 896: ;-----------------------------------------------------------------------; ! 897: ; See if there are any more banks to process. ! 898: ;-----------------------------------------------------------------------; ! 899: ! 900: public check_next_bank ! 901: check_next_bank: ! 902: ! 903: mov edi,pdsurf ! 904: mov eax,[edi].dsurf_rcl1WindowClip.yBottom ;is the fill bottom in ! 905: cmp ulBottomScan,eax ; the current bank? ! 906: jle short banks_done ;yes, so we're done ! 907: ;no, map in the next bank and fill it ! 908: mov ulCurrentTopScan,eax ;remember where the top of the bank ! 909: ; we're about to map in is (same as ! 910: ; bottom of bank we just did) ! 911: ! 912: ptrCall <dword ptr [edi].dsurf_pfnBankControl>,<edi,eax,JustifyTop> ! 913: ;map in the bank ! 914: ! 915: ; Compute the starting address and scan line count in this bank. ! 916: ! 917: mov eax,pdsurf ;EAX->target surface ! 918: mov ebx,ulBottomScan ;bottom of destination rectangle ! 919: cmp ebx,[eax].dsurf_rcl1WindowClip.yBottom ! 920: ;which comes first, the bottom of the ! 921: ; dest rect or the bottom of the ! 922: ; current bank? ! 923: jl short BottomScanSet2 ;fill bottom comes first, so draw to ! 924: ; that; this is the last bank in fill ! 925: mov ebx,[eax].dsurf_rcl1WindowClip.yBottom ! 926: ;bank bottom comes first; draw to ! 927: ; bottom of bank ! 928: BottomScanSet2: ! 929: mov edi,ulCurrentTopScan ;top scan line to fill in current bank ! 930: sub ebx,edi ;# of scans to fill in bank ! 931: imul edi,ulScanWidth ;offset of starting scan line ! 932: ! 933: ; Note that the start of the bitmap will change each time through the ! 934: ; bank loop, because the start of the bitmap is varied to map the ! 935: ; desired scan line to the banking window. ! 936: ! 937: add edi,[eax].dsurf_pvBitmapStart ;start of scan in bitmap ! 938: add edi,ulRowOffset ;EDI = start offset of fill in bitmap ! 939: ! 940: ; We have computed the starting address and scan count. Time to start drawing ! 941: ; in the initial bank. ! 942: ! 943: mov esi,pBrush ;edx = min(PatternHeight,BltHeight) ! 944: mov ecx,[esi + oem_brush_height] ! 945: sub ecx,ebx ! 946: sbb edx,edx ! 947: and edx,ecx ! 948: add edx,ebx ! 949: mov ulVbBlindCount,edx ! 950: ! 951: ; Brush alignment. We need to look at pptlBrush ! 952: ! 953: mov eax,ulCurrentTopScan ;top scan line to fill in current bank ! 954: sub eax,ulPatternOrgY ; ! 955: ! 956: jns short pos_y_offset1 ; ! 957: neg eax ; ! 958: and eax,7 ;-eax mod 8 ! 959: neg eax ; ! 960: add eax,8 ; ! 961: jmp short save_pat_pointer1 ! 962: pos_y_offset1: ! 963: and eax,7 ;eax mod 8 ! 964: save_pat_pointer1: ! 965: add eax,eax ;Y Offset * PatternWidth (2 bytes) ! 966: ! 967: lea edx,RotatedPat ;Pattern Dest ! 968: add eax,edx ! 969: mov pulPattern,eax ;Drawing code uses this as the ! 970: ;source for the pattern ! 971: ! 972: ; Draw in the new bank. ! 973: ! 974: jmp pfnStartDrawing ! 975: ! 976: ! 977: ;-----------------------------------------------------------------------; ! 978: ; Done with all banks in this fill. ! 979: ! 980: public banks_done ! 981: banks_done: ! 982: PLAIN_RET ! 983: ! 984: endProc vMonoPatBlt ! 985: ! 986: ;---------------------------------------------------------------------------- ! 987: ; Wes Trick Setup code. This code decides if this is a one or a two pass ! 988: ; operation. ! 989: ;---------------------------------------------------------------------------- ! 990: align 4 ! 991: public wes_trick ! 992: wes_trick: ! 993: mov esi,pfnWesTrick ! 994: ! 995: mov ecx,ulFgClr ; ! 996: mov eax,ecx ; ! 997: xor ecx,ulBkClr ;mask = ulBkClr ^ ulFgClr ! 998: ! 999: mov edx,EGA_BASE+SEQ_DATA ;Index should be pointing to the ! 1000: ; plane mask (2) ! 1001: mov ch,cl ! 1002: not ch ;Set/Reset Enable bits ! 1003: and cl,al ;ulFgdColor & mask ! 1004: or cl,al ! 1005: jz short check_bk_bits ;if zero - one background pass ! 1006: ! 1007: mov ulVbMask,0 ;We do not want to invert the ! 1008: ;foreground pass ! 1009: or ch,cl ! 1010: mov al,ch ! 1011: out dx,al ;Enable Planes for First Pass ! 1012: ! 1013: push ecx ! 1014: push edi ;Save our Dest pointer ! 1015: push ebx ;Save our count ! 1016: call esi ;Draw the foreground pass ! 1017: pop ebx ;restore the line count ! 1018: pop edi ;restore the dest pointer ! 1019: pop eax ;Restore bk mask ! 1020: mov esi,pfnWesTrick ! 1021: ! 1022: ! 1023: check_bk_bits: ! 1024: not al ! 1025: and al,MM_ALL ! 1026: jnz short @f ! 1027: ret ! 1028: @@: ! 1029: mov ulVbMask,-1 ;We do not want to invert the ! 1030: mov edx,EGA_BASE+SEQ_DATA ;Index should be pointing to the ! 1031: ; plane mask (2) ! 1032: out dx,al ! 1033: jmp esi ! 1034: ! 1035: ;-------------------------------------------------------------------------- ! 1036: ; Do the edges here. ! 1037: ;-------------------------------------------------------------------------- ! 1038: ! 1039: align 4 ! 1040: public do_edge_wes_trick ! 1041: do_edge_wes_trick: ! 1042: ; ebx = line count ! 1043: ; edi = dest ! 1044: ! 1045: mov ulVbTopScan,ebx ;Mod 8 our count for the venetian blind ! 1046: add ebx,ulVbYRound ;Calc the number of lines to do ! 1047: mov ecx,ulVbyShift ! 1048: shr ebx,cl ! 1049: ! 1050: mov esi,pulPattern ! 1051: mov ax,[esi] ;get pattern into place ! 1052: add esi,2 ;patterns stored as words ! 1053: xor eax,ulVbMask ;Invert the pattern if we are doing ! 1054: ; a background pass ! 1055: ! 1056: push ulVbBlindCount ! 1057: ; Set up variables for entering unrolled loop. ! 1058: wes_trick_loop: ! 1059: SET_UP_UNROLL_VARS ebx,edx, ebx,pfnDraw1WideEntry, LOOP_UNROLL_SHIFT ! 1060: ! 1061: mov ecx,ulVbNextScan ;offset from one scan to next ! 1062: ! 1063: push edi ;save dest pointer ! 1064: call edx ;jump into the unrolled loop to draw ! 1065: pop edi ;restore dest pointer ! 1066: ! 1067: add edi,ulScanWidth ;move to next scan line ! 1068: ! 1069: dec ulVbBlindCount ! 1070: jz short wes_trick_loop_done ;jz if we are finished ! 1071: ! 1072: mov eax,ulVbTopScan ;restore scan count ! 1073: dec eax ;Subtract off completed top line ! 1074: mov ulVbTopScan,eax ;save for next loop ! 1075: add eax,ulVbYRound ;Calc the number of lines to do ! 1076: mov ecx,ulVbyShift ;for this venetian blind pass ! 1077: shr eax,cl ;including any partial patterns ! 1078: mov ebx,eax ;at the bottom ! 1079: ! 1080: ! 1081: mov ax,[esi] ! 1082: add esi,2 ;point to the next pattern line ! 1083: xor eax,ulVbMask ;Invert the pattern if we are doing ! 1084: ; a background pass ! 1085: ! 1086: jmp short wes_trick_loop ! 1087: ! 1088: align 4 ! 1089: wes_trick_loop_done: ! 1090: pop ulVbBlindCount ! 1091: ret ! 1092: ! 1093: ;-------------------------------------------------------------------------- ! 1094: ; Do the middle bytes here for blts with rops. ! 1095: ;-------------------------------------------------------------------------- ! 1096: ! 1097: align 4 ! 1098: public do_wide_wes_trick ! 1099: do_wide_wes_trick: ! 1100: ; ebx = line count ! 1101: ; edi = dest ! 1102: ! 1103: mov ulVbTopScan,ebx ;Mod 8 our count for the venetian blind ! 1104: add ebx,ulVbYRound ;Calc the number of lines to do ! 1105: mov ecx,ulVbyShift ! 1106: shr ebx,cl ! 1107: ! 1108: mov esi,pulPattern ! 1109: mov al,[esi] ;get pattern into place ! 1110: add esi,2 ;patterns stored as words ! 1111: mov pulVbPattern,esi ! 1112: xor eax,ulVbMask ;Invert the pattern if we are doing ! 1113: ; a background pass ! 1114: ! 1115: push ulVbBlindCount ! 1116: ! 1117: mov edx,pfnWholeBytes ! 1118: ! 1119: ; Set up variables for entering unrolled loop. ! 1120: wide_wes_trick_loop: ! 1121: ! 1122: SET_UP_UNROLL_VARS ebx,ecx,ebx,[edx], LOOP_UNROLL_SHIFT ! 1123: ! 1124: mov esi,ulWholeBytes ! 1125: mov edx,ulVbNextScan ;offset from one scan to next ! 1126: sub edx,esi ! 1127: add edx,ulSpecialBytes ! 1128: ! 1129: push edi ;save dest pointer ! 1130: call ecx ;jump into the unrolled loop to draw ! 1131: pop edi ;restore dest pointer ! 1132: ! 1133: add edi,ulScanWidth ;move to next scan line ! 1134: ! 1135: dec ulVbBlindCount ! 1136: jz short wide_wes_trick_loop_done ;jz if we are finished ! 1137: ! 1138: mov eax,ulVbTopScan ;restore scan count ! 1139: dec eax ;Subtract off completed top line ! 1140: mov ulVbTopScan,eax ;save for next loop ! 1141: add eax,ulVbYRound ;Calc the number of lines to do ! 1142: mov ecx,ulVbyShift ;for this venetian blind pass ! 1143: shr eax,cl ;including any partial patterns ! 1144: mov ebx,eax ;at the bottom ! 1145: ! 1146: ! 1147: mov esi,pulVbPattern ! 1148: mov al,[esi] ;get pattern word ! 1149: add esi,2 ;point to the next pattern line ! 1150: mov pulVbPattern,esi ! 1151: xor eax,ulVbMask ;Invert the pattern if we are doing ! 1152: ; a background pass ! 1153: ! 1154: mov edx,pfnWholeBytes ! 1155: jmp short wide_wes_trick_loop ! 1156: ! 1157: align 4 ! 1158: wide_wes_trick_loop_done: ! 1159: pop ulVbBlindCount ! 1160: ret ! 1161: ! 1162: ;-----------------------------------------------------------------------; ! 1163: ; Unrolled loops. ! 1164: ;-----------------------------------------------------------------------; ! 1165: ! 1166: ;-----------------------------------------------------------------------; ! 1167: ; Unrolled loop stuff for wide replace-type rops (arbitrary width). ! 1168: ;-----------------------------------------------------------------------; ! 1169: ! 1170: ; Tables of entry points into unrolled wide write-only loops. ! 1171: UNROLL_LOOP_ENTRY_TABLE pfnDrawWide00Entry,W00,LOOP_UNROLL_COUNT ! 1172: UNROLL_LOOP_ENTRY_TABLE pfnDrawWide01Entry,W01,LOOP_UNROLL_COUNT ! 1173: UNROLL_LOOP_ENTRY_TABLE pfnDrawWide10Entry,W10,LOOP_UNROLL_COUNT ! 1174: UNROLL_LOOP_ENTRY_TABLE pfnDrawWide11Entry,W11,LOOP_UNROLL_COUNT ! 1175: ! 1176: ;-----------------------------------------------------------------------; ! 1177: ; Macro to draw n bytes, 0 leading bytes, 0 trailing bytes, then advance ! 1178: ; to next scan line. ! 1179: ! 1180: DRAW_WIDE_00 macro ENTRY_LABEL,ENTRY_INDEX ! 1181: &ENTRY_LABEL&ENTRY_INDEX&: ! 1182: mov ecx,esi ;# of whole words ! 1183: rep stosw ;fill all whole bytes as dwords ! 1184: add edi,edx ;point to the next scan line ! 1185: endm ;-----------------------------------; ! 1186: ! 1187: ; N-wide write-only, 0 leading bytes, 0 trailing bytes. ! 1188: ; EAX = Pattern Byte ! 1189: ; EBX = count of scans to fill ((total scans/ pattern height) + partial) ! 1190: ; EDX = offset from end of one scan's fill to start of next similar line ! 1191: ; ESI = pattern data ! 1192: ; EDI = target address to fill ! 1193: ! 1194: align 4 ! 1195: draw_wide_00_loop proc near ! 1196: UNROLL_LOOP DRAW_WIDE_00,W00,LOOP_UNROLL_COUNT ! 1197: dec ebx ! 1198: jnz draw_wide_00_loop ! 1199: ! 1200: ret ! 1201: ! 1202: draw_wide_00_loop endp ! 1203: ! 1204: ! 1205: ;-----------------------------------------------------------------------; ! 1206: ; Macro to draw n bytes, 0 leading bytes, 0 trailing bytes, then advance ! 1207: ; to next scan line. ! 1208: ! 1209: DRAW_WIDE_01 macro ENTRY_LABEL,ENTRY_INDEX ! 1210: &ENTRY_LABEL&ENTRY_INDEX&: ! 1211: mov ecx,esi ;# of whole words ! 1212: rep stosw ;fill all whole bytes as dwords ! 1213: mov [edi],al ;trailing byte ! 1214: inc edi ! 1215: add edi,edx ;point to the next scan line ! 1216: endm ;-----------------------------------; ! 1217: ! 1218: ; N-wide write-only, 0 leading bytes, 0 trailing bytes. ! 1219: ; EAX = Pattern Byte ! 1220: ; EBX = count of scans to fill ((total scans/ pattern height) + partial) ! 1221: ; EDX = offset from end of one scan's fill to start of next similar line ! 1222: ; ESI = pattern data ! 1223: ; EDI = target address to fill ! 1224: ! 1225: align 4 ! 1226: draw_wide_01_loop proc near ! 1227: UNROLL_LOOP DRAW_WIDE_01,W01,LOOP_UNROLL_COUNT ! 1228: dec ebx ! 1229: jnz draw_wide_01_loop ! 1230: ret ! 1231: ! 1232: draw_wide_01_loop endp ! 1233: ! 1234: ! 1235: ;-----------------------------------------------------------------------; ! 1236: ; Macro to draw n bytes, 0 leading bytes, 0 trailing bytes, then advance ! 1237: ; to next scan line. ! 1238: ! 1239: DRAW_WIDE_10 macro ENTRY_LABEL,ENTRY_INDEX ! 1240: &ENTRY_LABEL&ENTRY_INDEX&: ! 1241: mov [edi],ah ;do leading byte ! 1242: inc edi ;advance poitner ! 1243: mov ecx,esi ;# of whole words ! 1244: rep stosw ;fill all whole bytes as dwords ! 1245: add edi,edx ;point to the next scan line ! 1246: endm ;-----------------------------------; ! 1247: ! 1248: ; N-wide write-only, 0 leading bytes, 0 trailing bytes. ! 1249: ; EAX = Pattern Byte ! 1250: ; EBX = count of scans to fill ((total scans/ pattern height) + partial) ! 1251: ; EDX = offset from end of one scan's fill to start of next similar line ! 1252: ; ESI = pattern data ! 1253: ; EDI = target address to fill ! 1254: ! 1255: align 4 ! 1256: draw_wide_10_loop proc near ! 1257: UNROLL_LOOP DRAW_WIDE_10,W10,LOOP_UNROLL_COUNT ! 1258: dec ebx ! 1259: jnz draw_wide_10_loop ! 1260: ret ! 1261: ! 1262: draw_wide_10_loop endp ! 1263: ! 1264: ;-----------------------------------------------------------------------; ! 1265: ; Macro to draw n bytes, 0 leading bytes, 0 trailing bytes, then advance ! 1266: ; to next scan line. ! 1267: ! 1268: DRAW_WIDE_11 macro ENTRY_LABEL,ENTRY_INDEX ! 1269: &ENTRY_LABEL&ENTRY_INDEX&: ! 1270: mov [edi],ah ;do leading byte ! 1271: inc edi ;advance poitner ! 1272: mov ecx,esi ;# of whole words ! 1273: rep stosw ;fill all whole bytes as dwords ! 1274: mov [edi],al ;trailing byte ! 1275: inc edi ! 1276: add edi,edx ;point to the next scan line ! 1277: endm ;-----------------------------------; ! 1278: ! 1279: ; N-wide write-only, 0 leading bytes, 0 trailing bytes. ! 1280: ; EAX = Pattern Byte ! 1281: ; EBX = count of scans to fill ((total scans/ pattern height) + partial) ! 1282: ; EDX = offset from end of one scan's fill to start of next similar line ! 1283: ; ESI = pattern data ! 1284: ; EDI = target address to fill ! 1285: ! 1286: align 4 ! 1287: draw_wide_11_loop proc near ! 1288: UNROLL_LOOP DRAW_WIDE_11,W11,LOOP_UNROLL_COUNT ! 1289: dec ebx ! 1290: jnz draw_wide_11_loop ! 1291: ret ! 1292: ! 1293: draw_wide_11_loop endp ! 1294: ! 1295: ;-----------------------------------------------------------------------; ! 1296: ; Unrolled drawing stuff (unrolled to reduce jumps to speed things up), ! 1297: ; for cases where read before write is NOT required. ! 1298: ;-----------------------------------------------------------------------; ! 1299: ! 1300: ; Tables of entry points into unrolled 1-, 2-, 3-, and 4-wide write-only loops. ! 1301: ; Note that there may be separate entry tables for various alignments of a ! 1302: ; specific width, in cases where performance can be improved by using different ! 1303: ; code for different alignments. ! 1304: ! 1305: UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideEvenEntry,W1_EVEN,LOOP_UNROLL_COUNT ! 1306: UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideOddEntry,W1_ODD,LOOP_UNROLL_COUNT ! 1307: UNROLL_LOOP_ENTRY_TABLE pfnDraw2WideEvenEntry,W2_EVEN,LOOP_UNROLL_COUNT ! 1308: UNROLL_LOOP_ENTRY_TABLE pfnDraw2WideOddEntry,W2_ODD,LOOP_UNROLL_COUNT ! 1309: UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideEvenEntry,W3_EVEN,LOOP_UNROLL_COUNT ! 1310: UNROLL_LOOP_ENTRY_TABLE pfnDraw3WideOddEntry,W3_ODD,LOOP_UNROLL_COUNT ! 1311: UNROLL_LOOP_ENTRY_TABLE pfnDraw4WideEvenEntry,W4_EVEN,LOOP_UNROLL_COUNT ! 1312: UNROLL_LOOP_ENTRY_TABLE pfnDraw4WideOddEntry,W4_ODD,LOOP_UNROLL_COUNT ! 1313: UNROLL_LOOP_ENTRY_TABLE pfnDraw5WideEvenEntry,W5_EVEN,LOOP_UNROLL_COUNT ! 1314: UNROLL_LOOP_ENTRY_TABLE pfnDraw5WideOddEntry,W5_ODD,LOOP_UNROLL_COUNT ! 1315: UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideEvenEntry,W6_EVEN,LOOP_UNROLL_COUNT ! 1316: UNROLL_LOOP_ENTRY_TABLE pfnDraw6WideOddEntry,W6_ODD,LOOP_UNROLL_COUNT ! 1317: UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideEvenEntry,W7_EVEN,LOOP_UNROLL_COUNT ! 1318: UNROLL_LOOP_ENTRY_TABLE pfnDraw7WideOddEntry,W7_ODD,LOOP_UNROLL_COUNT ! 1319: UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideEvenEntry,W8_EVEN,LOOP_UNROLL_COUNT ! 1320: UNROLL_LOOP_ENTRY_TABLE pfnDraw8WideOddEntry,W8_ODD,LOOP_UNROLL_COUNT ! 1321: ! 1322: ;-----------------------------------------------------------------------; ! 1323: ; Unrolled 1-, 2-, 3-, and 4-wide write-only edge-drawing loops. ! 1324: ; ! 1325: ; Entry: ! 1326: ; AL/AX/EAX = pixel mask (if AX or EAX, then 0xFFFF or 0xFFFFFFFF) ! 1327: ; EBX = unrolled loop count ! 1328: ; ECX = scan line width in bytes ! 1329: ; EDI = start offset ! 1330: ; ! 1331: ; EBX, EDI modified. All other registers preserved. ! 1332: ! 1333: ;-----------------------------------------------------------------------; ! 1334: ; Macro to draw one write-only byte, then advance to next scan line. ! 1335: ! 1336: DRAW_1_WIDE_EVEN macro ENTRY_LABEL,ENTRY_INDEX ! 1337: &ENTRY_LABEL&ENTRY_INDEX&: ! 1338: mov [edi],al ;we always read 0xFF, so AL is written ! 1339: ; as-is; because we're in write mode 3, ! 1340: ; AL becomes the Bit Mask ! 1341: add edi,edx ;point to the next scan line ! 1342: endm ;-----------------------------------; ! 1343: ! 1344: ; 1-wide write-only. ! 1345: ! 1346: align 4 ! 1347: draw_1_wide_even_loop proc near ! 1348: UNROLL_LOOP DRAW_1_WIDE_EVEN,W1_EVEN,LOOP_UNROLL_COUNT ! 1349: dec ebx ! 1350: jnz draw_1_wide_even_loop ! 1351: ! 1352: ret ! 1353: ! 1354: draw_1_wide_even_loop endp ! 1355: ! 1356: DRAW_1_WIDE_ODD macro ENTRY_LABEL,ENTRY_INDEX ! 1357: &ENTRY_LABEL&ENTRY_INDEX&: ! 1358: mov [edi],ah ;we always read 0xFF, so AL is written ! 1359: ; as-is; because we're in write mode 3, ! 1360: ; AL becomes the Bit Mask ! 1361: add edi,edx ;point to the next scan line ! 1362: endm ;-----------------------------------; ! 1363: ! 1364: ; 1-wide write-only. ! 1365: ! 1366: align 4 ! 1367: draw_1_wide_odd_loop proc near ! 1368: UNROLL_LOOP DRAW_1_WIDE_ODD,W1_ODD,LOOP_UNROLL_COUNT ! 1369: dec ebx ! 1370: jnz draw_1_wide_odd_loop ! 1371: ! 1372: ret ! 1373: ! 1374: draw_1_wide_odd_loop endp ! 1375: ! 1376: ;-----------------------------------------------------------------------; ! 1377: ; Macro to draw two write-only bytes, then advance to next scan line. ! 1378: ! 1379: DRAW_2_WIDE_EVEN macro ENTRY_LABEL,ENTRY_INDEX ! 1380: &ENTRY_LABEL&ENTRY_INDEX&: ! 1381: mov [edi],ax ! 1382: add edi,edx ;point to the next scan line ! 1383: endm ;-----------------------------------; ! 1384: ! 1385: ; 2-wide write-only. ! 1386: ! 1387: align 4 ! 1388: draw_2_wide_even_loop proc near ! 1389: UNROLL_LOOP DRAW_2_WIDE_EVEN,W2_EVEN,LOOP_UNROLL_COUNT ! 1390: dec ebx ! 1391: jnz draw_2_wide_even_loop ! 1392: ! 1393: ret ! 1394: ! 1395: draw_2_wide_even_loop endp ! 1396: ! 1397: DRAW_2_WIDE_ODD macro ENTRY_LABEL,ENTRY_INDEX ! 1398: &ENTRY_LABEL&ENTRY_INDEX&: ! 1399: mov [edi],ah ! 1400: mov [edi+1],al ! 1401: add edi,edx ;point to the next scan line ! 1402: endm ;-----------------------------------; ! 1403: ! 1404: ; 2-wide write-only. ! 1405: ! 1406: align 4 ! 1407: draw_2_wide_odd_loop proc near ! 1408: UNROLL_LOOP DRAW_2_WIDE_ODD,W2_ODD,LOOP_UNROLL_COUNT ! 1409: dec ebx ! 1410: jnz draw_2_wide_odd_loop ! 1411: ! 1412: ret ! 1413: ! 1414: draw_2_wide_odd_loop endp ! 1415: ! 1416: ;-----------------------------------------------------------------------; ! 1417: ; Macro to draw three write-only bytes, then advance to next scan line. ! 1418: ; Optimized for even start address. ! 1419: ! 1420: DRAW_3_WIDE_EVEN macro ENTRY_LABEL,ENTRY_INDEX ! 1421: &ENTRY_LABEL&ENTRY_INDEX&: ! 1422: mov [edi],ax ! 1423: mov [edi+2],al ! 1424: add edi,edx ;point to the next scan line ! 1425: endm ;-----------------------------------; ! 1426: ! 1427: ; 3-wide write-only, starting at an even address. ! 1428: ! 1429: align 4 ! 1430: draw_3_wide_even_loop proc near ! 1431: UNROLL_LOOP DRAW_3_WIDE_EVEN,W3_EVEN,LOOP_UNROLL_COUNT ! 1432: dec ebx ! 1433: jnz draw_3_wide_even_loop ! 1434: ! 1435: ret ! 1436: ! 1437: draw_3_wide_even_loop endp ! 1438: ! 1439: ;-----------------------------------------------------------------------; ! 1440: ; Macro to draw three write-only bytes, then advance to next scan line. ! 1441: ; Optimized for odd start address. ! 1442: ! 1443: DRAW_3_WIDE_ODD macro ENTRY_LABEL,ENTRY_INDEX ! 1444: &ENTRY_LABEL&ENTRY_INDEX&: ! 1445: mov [edi],ah ! 1446: mov [edi+1],ax ! 1447: add edi,edx ;point to the next scan line ! 1448: endm ;-----------------------------------; ! 1449: ! 1450: ; 3-wide write-only, starting at an odd address. ! 1451: ! 1452: align 4 ! 1453: draw_3_wide_odd_loop proc near ! 1454: UNROLL_LOOP DRAW_3_WIDE_ODD,W3_ODD,LOOP_UNROLL_COUNT ! 1455: dec ebx ! 1456: jnz draw_3_wide_odd_loop ! 1457: ! 1458: ret ! 1459: ! 1460: draw_3_wide_odd_loop endp ! 1461: ! 1462: ! 1463: ;-----------------------------------------------------------------------; ! 1464: ; Macro to draw three write-only bytes, then advance to next scan line. ! 1465: ; Optimized for even start address. ! 1466: ! 1467: DRAW_4_WIDE_EVEN macro ENTRY_LABEL,ENTRY_INDEX ! 1468: &ENTRY_LABEL&ENTRY_INDEX&: ! 1469: mov [edi],ax ! 1470: mov [edi+2],ax ! 1471: add edi,edx ;point to the next scan line ! 1472: endm ;-----------------------------------; ! 1473: ! 1474: ; 4-wide write-only, starting at an even address. ! 1475: ! 1476: align 4 ! 1477: draw_4_wide_even_loop proc near ! 1478: UNROLL_LOOP DRAW_4_WIDE_EVEN,W4_EVEN,LOOP_UNROLL_COUNT ! 1479: dec ebx ! 1480: jnz draw_4_wide_even_loop ! 1481: ! 1482: ret ! 1483: ! 1484: draw_4_wide_even_loop endp ! 1485: ! 1486: ;-----------------------------------------------------------------------; ! 1487: ; Macro to draw three write-only bytes, then advance to next scan line. ! 1488: ; Optimized for odd start address. ! 1489: ! 1490: DRAW_4_WIDE_ODD macro ENTRY_LABEL,ENTRY_INDEX ! 1491: &ENTRY_LABEL&ENTRY_INDEX&: ! 1492: mov [edi],ah ! 1493: mov [edi+1],ax ! 1494: mov [edi+3],al ! 1495: add edi,edx ;point to the next scan line ! 1496: endm ;-----------------------------------; ! 1497: ! 1498: ; 4-wide write-only, starting at an odd address. ! 1499: ! 1500: align 4 ! 1501: draw_4_wide_odd_loop proc near ! 1502: UNROLL_LOOP DRAW_4_WIDE_ODD,W4_ODD,LOOP_UNROLL_COUNT ! 1503: dec ebx ! 1504: jnz draw_4_wide_odd_loop ! 1505: ! 1506: ret ! 1507: ! 1508: draw_4_wide_odd_loop endp ! 1509: ! 1510: ;-----------------------------------------------------------------------; ! 1511: ; Macro to draw three write-only bytes, then advance to next scan line. ! 1512: ; Optimized for even start address. ! 1513: ! 1514: DRAW_5_WIDE_EVEN macro ENTRY_LABEL,ENTRY_INDEX ! 1515: &ENTRY_LABEL&ENTRY_INDEX&: ! 1516: mov [edi],ax ! 1517: mov [edi+2],ax ! 1518: mov [edi+4],al ! 1519: add edi,edx ;point to the next scan line ! 1520: endm ;-----------------------------------; ! 1521: ! 1522: ; 5-wide write-only, starting at an even address. ! 1523: ! 1524: align 4 ! 1525: draw_5_wide_even_loop proc near ! 1526: UNROLL_LOOP DRAW_5_WIDE_EVEN,W5_EVEN,LOOP_UNROLL_COUNT ! 1527: dec ebx ! 1528: jnz draw_5_wide_even_loop ! 1529: ! 1530: ret ! 1531: ! 1532: draw_5_wide_even_loop endp ! 1533: ! 1534: ;-----------------------------------------------------------------------; ! 1535: ; Macro to draw three write-only bytes, then advance to next scan line. ! 1536: ; Optimized for odd start address. ! 1537: ! 1538: DRAW_5_WIDE_ODD macro ENTRY_LABEL,ENTRY_INDEX ! 1539: &ENTRY_LABEL&ENTRY_INDEX&: ! 1540: mov [edi],ah ! 1541: mov [edi+1],ax ! 1542: mov [edi+3],ax ! 1543: add edi,edx ;point to the next scan line ! 1544: endm ;-----------------------------------; ! 1545: ! 1546: ; 5-wide write-only, starting at an odd address. ! 1547: ! 1548: align 4 ! 1549: draw_5_wide_odd_loop proc near ! 1550: UNROLL_LOOP DRAW_5_WIDE_ODD,W5_ODD,LOOP_UNROLL_COUNT ! 1551: dec ebx ! 1552: jnz draw_5_wide_odd_loop ! 1553: ! 1554: ret ! 1555: ! 1556: draw_5_wide_odd_loop endp ! 1557: ! 1558: ;-----------------------------------------------------------------------; ! 1559: ; Macro to draw three write-only bytes, then advance to next scan line. ! 1560: ; Optimized for even start address. ! 1561: ! 1562: DRAW_6_WIDE_EVEN macro ENTRY_LABEL,ENTRY_INDEX ! 1563: &ENTRY_LABEL&ENTRY_INDEX&: ! 1564: mov [edi],ax ! 1565: mov [edi+2],ax ! 1566: mov [edi+4],ax ! 1567: add edi,edx ;point to the next scan line ! 1568: endm ;-----------------------------------; ! 1569: ! 1570: ; 6-wide write-only, starting at an even address. ! 1571: ! 1572: align 4 ! 1573: draw_6_wide_even_loop proc near ! 1574: UNROLL_LOOP DRAW_6_WIDE_EVEN,W6_EVEN,LOOP_UNROLL_COUNT ! 1575: dec ebx ! 1576: jnz draw_6_wide_even_loop ! 1577: ! 1578: ret ! 1579: ! 1580: draw_6_wide_even_loop endp ! 1581: ! 1582: ;-----------------------------------------------------------------------; ! 1583: ; Macro to draw three write-only bytes, then advance to next scan line. ! 1584: ; Optimized for odd start address. ! 1585: ! 1586: DRAW_6_WIDE_ODD macro ENTRY_LABEL,ENTRY_INDEX ! 1587: &ENTRY_LABEL&ENTRY_INDEX&: ! 1588: mov [edi],ah ! 1589: mov [edi+1],ax ! 1590: mov [edi+3],ax ! 1591: mov [edi+5],al ! 1592: add edi,edx ;point to the next scan line ! 1593: endm ;-----------------------------------; ! 1594: ! 1595: ; 6-wide write-only, starting at an odd address. ! 1596: ! 1597: align 4 ! 1598: draw_6_wide_odd_loop proc near ! 1599: UNROLL_LOOP DRAW_6_WIDE_ODD,W6_ODD,LOOP_UNROLL_COUNT ! 1600: dec ebx ! 1601: jnz draw_6_wide_odd_loop ! 1602: ! 1603: ret ! 1604: ! 1605: draw_6_wide_odd_loop endp ! 1606: ! 1607: ;-----------------------------------------------------------------------; ! 1608: ; Macro to draw three write-only bytes, then advance to next scan line. ! 1609: ; Optimized for even start address. ! 1610: ! 1611: DRAW_7_WIDE_EVEN macro ENTRY_LABEL,ENTRY_INDEX ! 1612: &ENTRY_LABEL&ENTRY_INDEX&: ! 1613: mov [edi],ax ! 1614: mov [edi+2],ax ! 1615: mov [edi+4],ax ! 1616: mov [edi+6],al ! 1617: add edi,edx ;point to the next scan line ! 1618: endm ;-----------------------------------; ! 1619: ! 1620: ; 7-wide write-only, starting at an even address. ! 1621: ! 1622: align 4 ! 1623: draw_7_wide_even_loop proc near ! 1624: UNROLL_LOOP DRAW_7_WIDE_EVEN,W7_EVEN,LOOP_UNROLL_COUNT ! 1625: dec ebx ! 1626: jnz draw_7_wide_even_loop ! 1627: ! 1628: ret ! 1629: ! 1630: draw_7_wide_even_loop endp ! 1631: ! 1632: ;-----------------------------------------------------------------------; ! 1633: ; Macro to draw three write-only bytes, then advance to next scan line. ! 1634: ; Optimized for odd start address. ! 1635: ! 1636: DRAW_7_WIDE_ODD macro ENTRY_LABEL,ENTRY_INDEX ! 1637: &ENTRY_LABEL&ENTRY_INDEX&: ! 1638: mov [edi],ah ! 1639: mov [edi+1],ax ! 1640: mov [edi+3],ax ! 1641: mov [edi+5],ax ! 1642: add edi,edx ;point to the next scan line ! 1643: endm ;-----------------------------------; ! 1644: ! 1645: ; 7-wide write-only, starting at an odd address. ! 1646: ! 1647: align 4 ! 1648: draw_7_wide_odd_loop proc near ! 1649: UNROLL_LOOP DRAW_7_WIDE_ODD,W7_ODD,LOOP_UNROLL_COUNT ! 1650: dec ebx ! 1651: jnz draw_7_wide_odd_loop ! 1652: ! 1653: ret ! 1654: ! 1655: draw_7_wide_odd_loop endp ! 1656: ! 1657: ;-----------------------------------------------------------------------; ! 1658: ; Macro to draw three write-only bytes, then advance to next scan line. ! 1659: ; Optimized for even start address. ! 1660: ! 1661: DRAW_8_WIDE_EVEN macro ENTRY_LABEL,ENTRY_INDEX ! 1662: &ENTRY_LABEL&ENTRY_INDEX&: ! 1663: mov [edi],ax ! 1664: mov [edi+2],ax ! 1665: mov [edi+4],ax ! 1666: mov [edi+6],ax ! 1667: add edi,edx ;point to the next scan line ! 1668: endm ;-----------------------------------; ! 1669: ! 1670: ; 8-wide write-only, starting at an even address. ! 1671: ! 1672: align 4 ! 1673: draw_8_wide_even_loop proc near ! 1674: UNROLL_LOOP DRAW_8_WIDE_EVEN,W8_EVEN,LOOP_UNROLL_COUNT ! 1675: dec ebx ! 1676: jnz draw_8_wide_even_loop ! 1677: ! 1678: ret ! 1679: ! 1680: draw_8_wide_even_loop endp ! 1681: ! 1682: ;-----------------------------------------------------------------------; ! 1683: ; Macro to draw three write-only bytes, then advance to next scan line. ! 1684: ; Optimized for odd start address. ! 1685: ! 1686: DRAW_8_WIDE_ODD macro ENTRY_LABEL,ENTRY_INDEX ! 1687: &ENTRY_LABEL&ENTRY_INDEX&: ! 1688: mov [edi],ah ! 1689: mov [edi+1],ax ! 1690: mov [edi+3],ax ! 1691: mov [edi+5],ax ! 1692: mov [edi+7],al ! 1693: add edi,edx ;point to the next scan line ! 1694: endm ;-----------------------------------; ! 1695: ! 1696: ; 8-wide write-only, starting at an odd address. ! 1697: ! 1698: align 4 ! 1699: draw_8_wide_odd_loop proc near ! 1700: UNROLL_LOOP DRAW_8_WIDE_ODD,W8_ODD,LOOP_UNROLL_COUNT ! 1701: dec ebx ! 1702: jnz draw_8_wide_odd_loop ! 1703: ! 1704: ret ! 1705: ! 1706: draw_8_wide_odd_loop endp ! 1707: ! 1708: ;-----------------------------------------------------------------------; ! 1709: ; Unrolled 1-, 2-, 3-, and 4-wide read before write drawing loops. ! 1710: ; ! 1711: ; Entry: ! 1712: ; AL = pixel mask ! 1713: ; EBX = unrolled loop count ! 1714: ; ECX = scan line width in bytes ! 1715: ; EDI = start offset ! 1716: ; ! 1717: ; EBX, EDI modified. All other registers preserved. ! 1718: ! 1719: UNROLL_LOOP_ENTRY_TABLE pfnDraw1WideEntry,RW1,LOOP_UNROLL_COUNT ! 1720: ! 1721: ;-----------------------------------------------------------------------; ! 1722: ; Macro to draw one read before write byte, then advance to next scan line. ! 1723: ! 1724: DRAW_1_WIDE macro ENTRY_LABEL,ENTRY_INDEX ! 1725: &ENTRY_LABEL&ENTRY_INDEX&: ! 1726: mov dh,[edi] ;load latches w/o destroying our data ! 1727: mov [edi],al ;write out our byte ! 1728: add edi,ecx ;move to the next blind ! 1729: endm ;-----------------------------------; ! 1730: ! 1731: ; 1-wide read/write. ! 1732: ! 1733: align 4 ! 1734: draw_1_wide_loop proc near ! 1735: UNROLL_LOOP DRAW_1_WIDE,RW1,LOOP_UNROLL_COUNT ! 1736: dec ebx ! 1737: jnz draw_1_wide_loop ! 1738: ! 1739: ret ! 1740: ! 1741: draw_1_wide_loop endp ! 1742: ! 1743: ; Tables of entry points into unrolled 1-, 2-, 3-, and 4-wide, and 5-or-wider ! 1744: ; read before write loops. ! 1745: ! 1746: UNROLL_LOOP_ENTRY_TABLE pfnDraw1RWEntry,RWW1,LOOP_UNROLL_COUNT ! 1747: UNROLL_LOOP_ENTRY_TABLE pfnDraw2RWEntry,RWW2,LOOP_UNROLL_COUNT ! 1748: UNROLL_LOOP_ENTRY_TABLE pfnDraw3RWEntry,RWW3,LOOP_UNROLL_COUNT ! 1749: UNROLL_LOOP_ENTRY_TABLE pfnDraw4RWEntry,RWW4,LOOP_UNROLL_COUNT ! 1750: UNROLL_LOOP_ENTRY_TABLE pfnDrawRWWideEntry,RWWIDE,LOOP_UNROLL_COUNT ! 1751: ! 1752: ! 1753: ;-----------------------------------------------------------------------; ! 1754: ; Unrolled 1-, 2-, 3-, and 4-wide read before write drawing loops. ! 1755: ; ! 1756: ; Entry: ! 1757: ; AL = pixel mask ! 1758: ; EBX = unrolled loop count ! 1759: ; ECX = scan line width in bytes ! 1760: ; EDI = start offset ! 1761: ; ! 1762: ; EBX, EDI modified. All other registers preserved. ! 1763: ! 1764: ;-----------------------------------------------------------------------; ! 1765: ; Macro to draw one read before write byte, then advance to next scan line. ! 1766: ! 1767: DRAW_1_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX ! 1768: &ENTRY_LABEL&ENTRY_INDEX&: ! 1769: mov ah,[edi] ! 1770: mov [edi],al ! 1771: add edi,edx ;point to the next scan line ! 1772: endm ;-----------------------------------; ! 1773: ! 1774: ; 1-wide read/write. ! 1775: ! 1776: align 4 ! 1777: draw_1_wide_rop_loop proc near ! 1778: UNROLL_LOOP DRAW_1_WIDE_RW,RWW1,LOOP_UNROLL_COUNT ! 1779: dec ebx ! 1780: jnz draw_1_wide_rop_loop ! 1781: ! 1782: ret ! 1783: ! 1784: draw_1_wide_rop_loop endp ! 1785: ! 1786: ;-----------------------------------------------------------------------; ! 1787: ; Macro to draw two read before write bytes, then advance to next scan line. ! 1788: ! 1789: DRAW_2_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX ! 1790: &ENTRY_LABEL&ENTRY_INDEX&: ! 1791: mov ah,[edi] ! 1792: mov [edi],al ! 1793: mov ah,[edi+1] ! 1794: mov [edi+1],al ! 1795: add edi,edx ;point to the next scan line ! 1796: endm ;-----------------------------------; ! 1797: ! 1798: ; 2-wide read/write. ! 1799: ! 1800: align 4 ! 1801: draw_2_wide_rop_loop proc near ! 1802: UNROLL_LOOP DRAW_2_WIDE_RW,RWW2,LOOP_UNROLL_COUNT ! 1803: dec ebx ! 1804: jnz draw_2_wide_rop_loop ! 1805: ! 1806: ret ! 1807: ! 1808: draw_2_wide_rop_loop endp ! 1809: ! 1810: ;-----------------------------------------------------------------------; ! 1811: ; Macro to draw three read before write bytes, then advance to next scan line. ! 1812: ! 1813: DRAW_3_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX ! 1814: &ENTRY_LABEL&ENTRY_INDEX&: ! 1815: mov ah,[edi] ! 1816: mov [edi],al ! 1817: mov ah,[edi+1] ! 1818: mov [edi+1],al ! 1819: mov ah,[edi+2] ! 1820: mov [edi+2],al ! 1821: add edi,edx ;point to the next scan line ! 1822: endm ;-----------------------------------; ! 1823: ! 1824: ; 3-wide read/write. ! 1825: ! 1826: align 4 ! 1827: draw_3_wide_rop_loop proc near ! 1828: UNROLL_LOOP DRAW_3_WIDE_RW,RWW3,LOOP_UNROLL_COUNT ! 1829: dec ebx ! 1830: jnz draw_3_wide_rop_loop ! 1831: ! 1832: ret ! 1833: ! 1834: draw_3_wide_rop_loop endp ! 1835: ! 1836: ;-----------------------------------------------------------------------; ! 1837: ; Macro to draw four read before write bytes, then advance to next scan line. ! 1838: ! 1839: DRAW_4_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX ! 1840: &ENTRY_LABEL&ENTRY_INDEX&: ! 1841: mov ah,[edi] ! 1842: mov [edi],al ! 1843: mov ah,[edi+1] ! 1844: mov [edi+1],al ! 1845: mov ah,[edi+2] ! 1846: mov [edi+2],al ! 1847: mov ah,[edi+3] ! 1848: mov [edi+3],al ! 1849: add edi,edx ;point to the next scan line ! 1850: endm ;-----------------------------------; ! 1851: ! 1852: ; 4-wide read/write. ! 1853: ! 1854: align 4 ! 1855: draw_4_wide_rop_loop proc near ! 1856: UNROLL_LOOP DRAW_4_WIDE_RW,RWW4,LOOP_UNROLL_COUNT ! 1857: dec ebx ! 1858: jnz draw_4_wide_rop_loop ! 1859: ! 1860: ret ! 1861: ! 1862: draw_4_wide_rop_loop endp ! 1863: ! 1864: ;-----------------------------------------------------------------------; ! 1865: ; Unrolled 5-or-wider read before write loop. ! 1866: ; ! 1867: ; Entry: ! 1868: ; EAX = # of bytes to fill across scan line (needed only by 5-or-wider ! 1869: ; handler) ! 1870: ; EBX = unrolled loop count ! 1871: ; EDX = offset from end of one scan line to the start of the next next ! 1872: ; EDI = start offset ! 1873: ; ! 1874: ; EBX, ECX, ESI, EDI modified. All other registers preserved. ! 1875: ! 1876: ;-----------------------------------------------------------------------; ! 1877: ; Macro to draw five or more read before write bytes, then advance to ! 1878: ; next scan line. (Actually, will handle any number of bytes, ! 1879: ; including 0, but there are special-case handlers for narrow cases.) ! 1880: ; Works because reads of display memory return 0ffh, which then becomes the ! 1881: ; Bit Mask as it's written in write mode 3. ! 1882: ! 1883: DRAW_WIDE_RW macro ENTRY_LABEL,ENTRY_INDEX ! 1884: &ENTRY_LABEL&ENTRY_INDEX&: ! 1885: mov ecx,esi ! 1886: @@: mov ah,[edi] ! 1887: mov [edi],al ! 1888: inc edi ! 1889: dec ecx ! 1890: jnz @b ! 1891: add edi,edx ! 1892: endm ;-----------------------------------; ! 1893: ! 1894: ; 5-or-wider read/write. ! 1895: ! 1896: align 4 ! 1897: draw_wide_rop_loop proc near ! 1898: UNROLL_LOOP DRAW_WIDE_RW,RWWIDE,LOOP_UNROLL_COUNT ! 1899: dec ebx ! 1900: jnz draw_wide_rop_loop ! 1901: ! 1902: ret ! 1903: ! 1904: draw_wide_rop_loop endp ! 1905: ! 1906: _TEXT$01 ends ! 1907: ! 1908: end ! 1909: ! 1910: ;masm386 -Mx -I..\..\inc -I..\..\..\inc -Id:\nt\public\sdk\inc -Di386=1 -DNT_INST=0 -DNT_UP=1 -DSTD_CALL -DDBG=1 -DDEVL=1 i386\patblt.asm,obj\i386\patblt.obj,x.lst; ! 1911: ! 1912:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.