Annotation of ntddk/src/video/displays/vga/i386/cblt.asm, revision 1.1.1.1

1.1       root        1:         page    ,132
                      2:         title   BitBLT
                      3: ;---------------------------Module-Header------------------------------;
                      4: ; Module Name: cblt.asm
                      5: ;
                      6: ; Copyright (c) 1992 Microsoft Corporation
                      7: ;-----------------------------------------------------------------------;
                      8:         .386
                      9: 
                     10: ;!!! All the code to convert from color to mono in this file needs to
                     11: ;!!! be deleted.  We don't need to do it anymore.
                     12: 
                     13: 
                     14: 
                     15: 
                     16: ifndef  DOS_PLATFORM
                     17:         .model  small,c
                     18: else
                     19: ifdef   STD_CALL
                     20:         .model  small,c
                     21: else
                     22:         .model  small,pascal
                     23: endif;  STD_CALL
                     24: endif;  DOS_PLATFORM
                     25: 
                     26:         assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
                     27:         assume fs:nothing,gs:nothing
                     28: 
                     29:         .code
                     30: 
                     31: _TEXT$01   SEGMENT DWORD USE32 PUBLIC 'CODE'
                     32:            ASSUME  CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
                     33: 
                     34:         .xlist
                     35:         include stdcall.inc         ; calling convention cmacros
                     36: 
                     37:         include i386\cmacFLAT.inc   ; FLATland cmacros
                     38:         include i386\display.inc    ; Display specific structures
                     39:         include i386\ppc.inc        ; Pack pel conversion structure
                     40:         include i386\bitblt.inc     ; General definitions
                     41:         include i386\ropdefs.inc    ; Rop definitions
                     42:         include i386\egavga.inc   ; EGA register definitions
                     43:         include i386\devdata.inc
                     44:         .list
                     45: 
                     46:         extrn   roptable:byte
                     47: ;-----------------------------Public-Routine----------------------------;
                     48: ; CBLT
                     49: ;
                     50: ; Compile a BLT onto the stack.
                     51: ;
                     52: ; Entry:
                     53: ;       EDI --> memory on stack to receive BLT program
                     54: ;       EBP --> fr structure
                     55: ; Returns:
                     56: ;       Nothing
                     57: ;-----------------------------------------------------------------------;
                     58: 
                     59: fr      equ     [ebp]                   ;For consistancy with other sources
                     60: 
                     61: cProc   cblt
                     62: 
                     63:         subttl  Compile - Outer Loop
                     64:         page
                     65: 
                     66: ; If converting a packed pel format to planer format, add the code
                     67: ; to convert one source scan into planer format
                     68: 
                     69:         test    fr.ppcBlt.fb,PPC_NEEDED
                     70:         jz      no_pack_pel_conversion
                     71:         mov     al,I_MOV_EBP_DWORD_I    ;Give conversion routine access
                     72:         stosb                           ;  to conversion data
                     73:         lea     eax,fr.ppcBlt
                     74:         stosd
                     75:         mov     al,I_CALL_DISP32        ;Call the static conversion code
                     76:         stosb
                     77:         mov     eax,fr.ppcBlt.pfnConvert
                     78:         sub     eax,edi
                     79:         sub     eax,4                   ;4 for length of displacement
                     80:         stosd
                     81: no_pack_pel_conversion:
                     82: 
                     83: ; Initialize plane indicator.
                     84: 
                     85:         mov     ax,(PLANE_1*256)+I_MOV_BL_BYTE_I
                     86:         stosw
                     87: 
                     88: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                     89: ; Create the outerloop code.  The first part of this code will save
                     90: ; the scan line count register, destination pointer, and the source
                     91: ; pointer (if there is a source).
                     92: ;
                     93: ; The generated code should look like:
                     94: ;
                     95: ;       push    ecx             ;Save scan line count
                     96: ;       push    edi             ;Save destination pointer
                     97: ; <     push    esi     >       ;Save source pointer
                     98: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                     99: 
                    100:         mov     fr.pNextPlane,edi       ;Save address of next plane code
                    101:         mov     bl,fr.the_flags
                    102:         mov     ax,I_PUSH_ECX_PUSH_EDI  ;Save scan line count, destination ptr
                    103:         stosw
                    104:         test    bl,F0_SRC_PRESENT       ;Is a source needed?
                    105:         jz      cblt_2020               ;  No
                    106:         mov     al,I_PUSH_ESI           ;  Yes, save source pointer
                    107:         stosb
                    108: cblt_2020:
                    109: 
                    110:         subttl  Compile - Plane Selection
                    111:         page
                    112: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    113: ; If the destination device is color and the display is involved in
                    114: ; the blt, then the color plane selection logic must be added in.
                    115: ; If the destination is monochrome, then no plane logic is needed.
                    116: ; Two color memory bitmaps will not cause the plane selection logic
                    117: ; to be copied.
                    118: ;
                    119: ; The generated code should look like:
                    120: ;
                    121: ; <     push    ebx     >       ;Save plane index
                    122: ; <     plane selection >       ;Select plane
                    123: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    124: 
                    125:         test    bl,F0_DEST_IS_COLOR     ;Is the destination color?
                    126:         jz      cblt_pattern_fetch      ;  No
                    127:         mov     al,I_PUSH_EBX           ;Save plane index
                    128:         stosb
                    129:         test    bl,F0_DEST_IS_DEV+F0_SRC_IS_DEV ;Is the device involved?
                    130:         jz      cblt_pattern_fetch              ;  No
                    131: 
                    132: ; The device is involved for a color blt.  Copy the logic for selecting
                    133: ; the read/write plane
                    134: 
                    135:         mov     esi,offset FLAT:cps     ;--> plane select logic
                    136:         mov     ecx,LENGTH_CPS
                    137:         rep     movsb
                    138: 
                    139:         subttl  Compile - Pattern Fetch
                    140:         page
                    141: 
                    142: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    143: ; Set up any pattern fetch code that might be needed.
                    144: ; The pattern code has many fixups, so it isn't taken from a
                    145: ; template.  It is just stuffed as it is created.
                    146: ;
                    147: ; Entry:  None
                    148: ;
                    149: ; Exit:   DH = pattern
                    150: ;
                    151: ; Uses:   AX,BX,CX,DH,flags
                    152: ;
                    153: ; For solid color brushes:
                    154: ;
                    155: ;     mov     dh,color
                    156: ;
                    157: ; For monochrome brushes:
                    158: ;
                    159: ;     mov     ebx,12345678h       ;Load address of the brush
                    160: ;     mov     dh,7[ebx]           ;Get next brush byte
                    161: ;     mov     al,[12345678h]      ;Get brush index
                    162: ;     add     al,direction        ;Add displacement to next byte (+1/-1)
                    163: ;     and     al,00000111b        ;Keep it in range
                    164: ;     mov     [12345678h],al      ;Store displacement to next plane's bits
                    165: ;
                    166: ; For color brushes:
                    167: ;
                    168: ;     mov     ebx,12345678h       ;Load address of the brush
                    169: ;     mov     dh,7[bx]            ;Get next brush byte
                    170: ;     mov     al,[12345678h]      ;Get brush index
                    171: ;     add     al,SIZE Pattern     ;Add displacement to next plane's bits
                    172: ;     and     al,00011111b        ;Keep it within the brush
                    173: ;     mov     [12345678h],al      ;Store displacement to next plane's bits
                    174: ;
                    175: ;     The address of the increment for the brush is saved for
                    176: ;     the plane looping logic if the destination is a three plane
                    177: ;     color device.  For a four plane color device, the AND
                    178: ;     automatically handles the wrap and no fixup is needed at
                    179: ;     the end of the plane loop.
                    180: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    181: 
                    182: cblt_pattern_fetch:
                    183:         test    bl,F0_PAT_PRESENT       ;Is a pattern needed?
                    184:         jz      cblt_initial_byte_fetch ;  No, skip pattern code
                    185:         mov     al,fr.brush_accel       ;Solid color needs no fetch logic
                    186:         test    al,SOLID_BRUSH
                    187:         jz      cblt_nonsolid_brush
                    188:         and     al,MM_ALL
                    189:         shl     eax,16
                    190:         mov     ax,I_TEST_BL_BYTE_I
                    191:         stosd
                    192:         dec     edi                     ;Was only a three byte instruction
                    193:         mov     eax,I_SETNZ_DH
                    194:         stosd
                    195:         dec     edi                     ;Was only a three byte instruction
                    196:         mov     ax,I_NEG_DH
                    197:         stosw
                    198:         jmp     short cblt_initial_byte_fetch
                    199: 
                    200: cblt_nonsolid_brush:
                    201:         mov     al,I_MOV_EBX_DWORD_I    ;mov ebx,lpPBrush
                    202:         stosb
                    203:         mov     eax,fr.lpPBrush
                    204:         stosd
                    205:         mov     ax,I_MOV_DH_EBX_DISP8   ;mov dh,pat_row[ebx]
                    206:         stosw
                    207:         mov     edx,edi                 ;Save address of the brush index
                    208:         mov     al,fr.pat_row           ;Set initial pattern row
                    209:         mov     bh,00000111b            ;Set brush index mask
                    210:         and     al,bh                   ;Make sure it's legal at start
                    211:         stosb
                    212:         mov     al,I_MOV_AL_MEM
                    213:         stosb                           ;mov al,[xxxxxxxx]
                    214:         mov     eax,edx
                    215:         stosd
                    216:         mov     al,I_ADD_AL_BYTE_I
                    217:         mov     ah,direction            ;Set brush index
                    218:         errnz   INCREASE-1              ;Must be a 1
                    219:         errnz   DECREASE+1              ;Must be a -1
                    220: 
                    221:         test    bl,F0_COLOR_PAT         ;Color pattern required?
                    222:         jz      cblt_2060               ;  No
                    223:         mov     fr.addr_brush_index,edx ;Save address of brush index
                    224:         mov     ah,SIZE_PATTERN         ;Set increment to next plane
                    225:         mov     bh,00011111b            ;Set brush index mask
                    226: 
                    227: cblt_2060:
                    228:         stosw
                    229:         mov     ah,bh                   ;and al,BrushIndexMask
                    230:         mov     al,I_AND_AL_BYTE_I
                    231:         stosw
                    232:         mov     al,I_MOV_MEM_AL
                    233:         stosb                           ;mov [xxxxxxxx],al
                    234:         mov     eax,edx
                    235:         stosd
                    236: 
                    237: 
                    238:         subttl  Compile - Initial Byte Fetch
                    239:         page
                    240: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    241: ; Create the initial byte code.  This may consist of one or two
                    242: ; initial fetches (if there is a source), followed by the required
                    243: ; logic action.  The code should look something like:
                    244: ;
                    245: ; BLTouterloop:
                    246: ;   <       mov     bp,mask_p   >   ;Load phase mask for entire loop
                    247: ;   <       xor     bh,bh       >   ;Clear previous unused bits
                    248: ;
                    249: ; ; Perform first byte fetch
                    250: ;
                    251: ;   <       lodsb               >   ;Get source byte
                    252: ;   <       color<==>mono munge >   ;Color <==> mono conversion
                    253: ;   <       phase alignment     >   ;Align bits as needed
                    254: ;
                    255: ; ; If an optional second fetch is needed, perform one
                    256: ;
                    257: ;   <       lodsb               >   ;Get source byte
                    258: ;   <       color to mono munge >   ;Color to mono munging
                    259: ;   <       phase alignment     >   ;Align bits as needed
                    260: ;
                    261: ;           logical action          ;Perform logical action required
                    262: ;
                    263: ;           mov     ah,[edi]        ;Get destination
                    264: ;           and     ax,cx           ;Saved unaltered bits
                    265: ;           or      al,ah           ;  and mask in altered bits
                    266: ;           stosb                   ;Save the result
                    267: ;
                    268: ; The starting address of the first fetch/logical combination will be
                    269: ; saved so that the code can be copied later instead of recreating it
                    270: ; (if there are two fecthes, the first fetch will not be copied)
                    271: ;
                    272: ; The length of the code up to the masking for altered/unaltered bits
                    273: ; will be saved so the code can be copied into the inner loop.
                    274: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    275: 
                    276: cblt_initial_byte_fetch:
                    277:         xor     dx,dx
                    278:         or      dh,fr.phase_h           ;Is the phase 0? (also get the phase)
                    279:         jz      cblt_3020               ;  Yes, so no phase alignment needed
                    280:         mov     al,I_SIZE_OVERRIDE
                    281:         stosb
                    282:         mov     al,I_MOV_BP_WORD_I      ;Set up the phase mask
                    283:         stosb
                    284:         mov     ax,fr.mask_p            ;Place the mask into the instruction
                    285:         stosw
                    286:         mov     ax,I_XOR_BH_BH          ;Clear previous unused bits
                    287:         stosw
                    288: 
                    289: cblt_3020:
                    290:         mov     fr.start_fl,edi             ;Save starting address of action
                    291:         test    fr.the_flags,F0_SRC_PRESENT ;Is there a source?
                    292:         jz      cblt_4000                   ;  No, don't generate fetch code
                    293: 
                    294: 
                    295: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    296: ; Generate the required sequence of instructions for a fetch
                    297: ; sequence.  Only the minimum code required is generated.
                    298: ;
                    299: ; The code generated will look something like the following:
                    300: ;
                    301: ; BLTfetch:
                    302: ;   <       lodsb                 > ;Get the next byte
                    303: ;   <       color munging         > ;Mono <==> color munging
                    304: ;
                    305: ; ; If the phase alignment isn't zero, then generate the minimum
                    306: ; ; phase alignment needed.  RORs or ROLs will be generated,
                    307: ; ; depending on the fastest sequence.  If the phase alignment
                    308: ; ; is zero, than no phase alignment code will be generated.
                    309: ;
                    310: ;   <       ror     al,n          > ;Rotate as needed
                    311: ;   <       mov     ah,al         > ;Mask used, unused bits
                    312: ;   <       and     ax,bp         > ;(BP) = phase mask
                    313: ;   <       or      al,bh         > ;Mask in old unused bits
                    314: ;   <       mov     bh,ah         > ;Save new unused bits
                    315: ;
                    316: ;
                    317: ; The nice thing about the above is it is possible for the fetch to
                    318: ; degenerate into a simple LODSB instruction.
                    319: ;
                    320: ; Currently:      BL = the_flags
                    321: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    322: 
                    323: cblt_3040:
                    324:         mov     fr.moore_flags,0        ;Assume REP cannot be used
                    325:         shl     bl,1                    ;Color conversion?
                    326:         jnc     cblt_3180               ;  No, we were lucky this time
                    327:         errnz   F0_GAG_CHOKE-10000000b
                    328:         js      cblt_3100               ;Mono ==> color
                    329:         errnz   F0_COLOR_PAT-01000000b
                    330: 
                    331:         subttl  Compile - Initial Byte Fetch, Color ==> Mono
                    332:         page
                    333: 
                    334: ; !!!  Color to mono should not be needed anymore since the Engine will
                    335: ; !!! not be calling me to do it!  Let's remove this code!
                    336: 
                    337: 
                    338: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    339: ; Generate the code to go from color to mono.  Color to mono
                    340: ; should map all colors that are background to 1's (white), and
                    341: ; all colors which aren't background to 0's (black).  If the source
                    342: ; is the display, then the color compare register will be used.
                    343: ; If the source is a memory bitmap, each byte of the plane will be
                    344: ; XORed with the color from that plane, with the results all ORed
                    345: ; together.  The final result will then be complemented, giving
                    346: ; the desired result.
                    347: ;
                    348: ; The generated code for bitmaps should look something like:
                    349: ;
                    350: ;     mov     al,next_plane[esi]            ;Get C1 byte of source
                    351: ;     mov     ah,2*next_plane[esi]          ;Get C2 byte of source
                    352: ;     xor     ax,C1BkColor+(C2BkColor*256)  ;XOR with plane's color
                    353: ;     or      ah,al                         ;OR the result
                    354: ;     mov     al,3*next_plane[esi]          ;Get C3 byte of source
                    355: ;     xor     al,C3BkColor
                    356: ;     or      ah,al
                    357: ;     lodsb                                 ;Get C0 source
                    358: ;     xor     al,C0BkColor                  ;XOR with C0BkColor
                    359: ;     or      al,ah                         ;OR with previous result
                    360: ;     not     al                            ;NOT to give 1's where background
                    361: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    362: 
                    363: cblt_3070:
                    364:         test    bl,F0_SRC_IS_DEV SHL 1  ;If device, use color compare register
                    365:         jz      cblt_3080               ;It's a memory bitmap
                    366: 
                    367: ; We're in luck, the color compare register can be used.  Set up
                    368: ; for a color read, and use the normal mono fetch code.  Show the
                    369: ; innerloop code that the REP instruction can be used if this is
                    370: ; a source copy.
                    371: 
                    372:         mov     fr.moore_flags,F1_REP_OK
                    373:         mov     ecx,edx                 ;Save dx
                    374:         mov     ah,fr.bkColor.SPECIAL   ;Get SPECIAL byte of color
                    375:         and     ah,MM_ALL
                    376:         mov     al,GRAF_COL_COMP        ;Stuff color into compare register
                    377:         mov     dx,EGA_BASE+GRAF_ADDR
                    378:         out     dx,ax
                    379:         mov     ax,GRAF_CDC             ;Set Color Don't Care register
                    380:         out     dx,ax
                    381:         mov     ax,M_COLOR_READ SHL 8 + GRAF_MODE
                    382:         out     dx,ax
                    383:         mov     edx,ecx
                    384:         jmp     cblt_3180               ;Go generate mono fetch code
                    385: 
                    386: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    387: ;       The source is a memory bitmap.  Generate the code to compute
                    388: ;       the result of the four planes:
                    389: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    390: 
                    391: cblt_3080:
                    392:         mov     ax,I_MOV_AL_ESI_DISP32
                    393:         stosw
                    394:         mov     eax,fr.src.next_plane
                    395:         stosd
                    396:         mov     ebx,eax                 ;Save plane width
                    397:         mov     ax,I_MOV_AH_ESI_DISP32
                    398:         stosw
                    399:         lea     eax,[ebx*2]
                    400:         stosd
                    401:         mov     al,I_SIZE_OVERRIDE
                    402:         stosb
                    403:         mov     al,I_XOR_AX_WORD_I
                    404:         stosb
                    405:         mov     al,fr.bkColor.SPECIAL   ;get the color index byte
                    406:         mov     ah,al                   ;have the same in AH
                    407:         and     ax,(C2_BIT shl 8) or C1_BIT
                    408:         neg     al
                    409:         sbb     al,al                   ;al will be 0ffh if plane bit is 1
                    410:         neg     ah
                    411:         sbb     ah,ah                   ;ah wil be 0ffh if plane bit is 1
                    412:         stosw
                    413:         mov     ax,I_OR_AH_AL
                    414:         stosw
                    415: 
                    416:         mov     ax,I_MOV_AL_ESI_DISP32
                    417:         stosw
                    418:         lea     eax,[ebx*2][ebx]
                    419:         stosd
                    420:         mov     al,I_XOR_AL_BYTE_I
                    421:         mov     ah,fr.bkColor.SPECIAL
                    422:         and     ah,C3_BIT
                    423:         neg     ah
                    424:         sbb     ah,ah
                    425:         stosw
                    426:         mov     ax,I_OR_AH_AL
                    427:         stosw
                    428: 
                    429:         mov     ax,I_LODSB+(I_XOR_AL_BYTE_I*256)
                    430:         stosw
                    431:         mov     al,fr.bkColor.SPECIAL
                    432:         shr     al,1                    ;get C0_BIT into carry
                    433:         sbb     al,al                   ;make it 0ffh if bit was set
                    434:         .errnz C0_BIT - 00000001b
                    435:         stosb                           ;save the modified value
                    436:         errnz   pcol_C0
                    437:         mov     ax,I_OR_AL_AH
                    438:         stosw
                    439:         mov     ax,I_NOT_AL
                    440:         stosw
                    441:         jmp     cblt_3240               ;Go create logic code
                    442: 
                    443:         subttl  Compile - Initial Byte Fetch, Mono ==> Color
                    444:         page
                    445: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    446: ; The conversion is mono to color.  Generate the code to
                    447: ; do the conversion, and generate the table which will
                    448: ; have the conversion values in it.
                    449: ;
                    450: ; When going from mono to color, 1 bits are considered to be
                    451: ; the background color, and 0 bits are considered to be the
                    452: ; foreground color.
                    453: ;
                    454: ; For each plane:
                    455: ;
                    456: ;   If the foreground=background=1, then 1 can be used in
                    457: ;   place of the source.
                    458: ;
                    459: ;   If the foreground=background=0, then 0 can be used in
                    460: ;   place of the source.
                    461: ;
                    462: ;   If the foreground=0 and background=1, then the source
                    463: ;   can be used as is.
                    464: ;
                    465: ;   If the foreground=1 and background=0, then the source
                    466: ;   must be complemented before using.
                    467: ;
                    468: ;   Looks like a boolean function to me.
                    469: ;
                    470: ; An AND mask and an XOR mask will be computed for each plane,
                    471: ; based on the above.  The source will then be processed against
                    472: ; the table.  The generated code should look like
                    473: ;
                    474: ;         lodsb
                    475: ;         and     al,[xxxx]
                    476: ;         xor     al,[xxxx+1]
                    477: ;
                    478: ; The table for munging the colors as stated above should look like:
                    479: ;
                    480: ;      BackGnd   ForeGnd    Result    AND  XOR
                    481: ;         1         1         1        00   FF
                    482: ;         0         0         0        00   00
                    483: ;         1         0         S        FF   00
                    484: ;         0         1     not S        FF   FF
                    485: ;
                    486: ; From this, it can be seen that the XOR mask is the same as the
                    487: ; foreground color.  The AND mask is the XOR of the foreground
                    488: ; and the background color.  Not too hard to compute
                    489: ;
                    490: ; It can also be seen that if the background color is white and the
                    491: ; foreground (text) color is black, then the conversion needn't be
                    492: ; generated (it just gives the source).  This is advantageous since
                    493: ; it will allow phased aligned source copies to use REP MOVSW.
                    494: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    495: 
                    496: ; Check to see if the background color is black, and the
                    497: ; foreground color is white.  This can be determined by
                    498: ; looking at the accelerator flags in the physical color.
                    499: 
                    500: cblt_3100:
                    501:         mov     ah,fr.TextColor.SPECIAL
                    502:         xor     ah,MONO_BIT             ;Map black to white
                    503:         and     ah,fr.bkColor.SPECIAL   ;AND in background color
                    504:         cmp     ah,MONO_BIT+ONES_OR_ZEROS
                    505:         jne     cblt_3110               ;Not black
                    506:         mov     fr.moore_flags,F1_REP_OK+F1_NO_MUNGE ;Show reps as ok, no color munge table
                    507:         jmp     short cblt_3180         ;Normal fetch required
                    508: 
                    509: ; No way around it.  The color conversion table and code
                    510: ; must be generated.
                    511: 
                    512: cblt_3110:
                    513:         mov     cl,fr.bkColor.SPECIAL   ;Get BackGround Colors
                    514:         mov     ch,fr.TextColor.SPECIAL ;Get ForeGround Colors
                    515:         xor     cl,ch
                    516:         shr     cl,1
                    517:         sbb     al,al
                    518:         shr     ch,1
                    519:         sbb     ah,ah
                    520:         mov     word ptr fr.ajM2C.(pcol_C0 * 2),ax
                    521:         shr     cl,1
                    522:         sbb     al,al
                    523:         shr     ch,1
                    524:         sbb     ah,ah
                    525:         mov     word ptr fr.ajM2C.(pcol_C1 * 2),ax
                    526:         shr     cl,1
                    527:         sbb     al,al
                    528:         shr     ch,1
                    529:         sbb     ah,ah
                    530:         mov     word ptr fr.ajM2C.(pcol_C2 * 2),ax
                    531:         shr     cl,1
                    532:         sbb     al,al
                    533:         shr     ch,1
                    534:         sbb     ah,ah
                    535:         mov     word ptr fr.ajM2C.(pcol_C3 * 2),ax
                    536:         errnz   <TextColor - bkColor - 4>
                    537: 
                    538: ;       Generate the code for munging the color as stated above.
                    539: 
                    540:         mov     ax,I_LODSB
                    541:         stosb                           ;lodsb
                    542:         mov     ax,I_AND_AL_MEM         ;and al,[xxxx]
                    543:         stosw
                    544:         lea     eax,fr.ajM2C            ;  Set address of color munge
                    545:         stosd
                    546:         mov     ebx,eax                 ;  Save address
                    547:         mov     ax,I_XOR_AL_MEM         ;xor al,[xxxx]
                    548:         stosw
                    549:         lea     eax,1[ebx]              ;  Set address of XOR mask
                    550:         stosd
                    551:         jmp     short cblt_3240
                    552: 
                    553: ; Just need to generate the normal fetch sequence (lodsb)
                    554: 
                    555: cblt_3180:
                    556:         mov     al,I_LODSB              ;Generate source fetch
                    557:         stosb
                    558: 
                    559:         subttl  Compile - Phase Alignment
                    560:         page
                    561: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    562: ; Generate the phase alignment if any.
                    563: ;
                    564: ; It is assumed that AL contains the source byte
                    565: ;
                    566: ; Currently:
                    567: ;     DH = phase alignment
                    568: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    569: 
                    570: cblt_3240:
                    571:         mov     ecx,edi                 ;end of fetch code
                    572:         sub     ecx,fr.start_fl         ;start of fetch code
                    573:         mov     fr.cFetchCode,ecx       ;save size of fetch code
                    574:         xor     ecx,ecx                 ;Might have garbage in it
                    575:         or      dh,dh                   ;Any phase alignment?
                    576:         jz      cblt_3280               ;  No, so skip alignment
                    577:         mov     cl,dh                   ;Get horizontal phase for rotating
                    578:         mov     ax,I_ROL_AL_N           ;Assume rotate left n times
                    579:         cmp     cl,5                    ;4 or less rotates?
                    580:         jc      cblt_3260               ;  Yes
                    581:         neg     cl                      ;  No, compute ROR count
                    582:         add     cl,8
                    583:         mov     ah,HIGH I_ROR_AL_N
                    584:         errnz   <(LOW I_ROL_AL_N)-(LOW I_ROR_AL_N)>
                    585: 
                    586: cblt_3260:
                    587:         stosw                           ;Stuff the phase alignment rotates
                    588:         mov     al,cl                   ;  then the phase alignment code
                    589:         stosb
                    590: 
                    591: ; Do not generate phase masking if there is only 1 src And only 1 dest byte.
                    592: ; This is not just an optimization, see comments where these flags are set.
                    593: 
                    594:         xor     ch,ch
                    595:         mov     al,fr.first_fetch
                    596:         and     al,FF_ONLY_1_SRC_BYTE or FF_ONLY_1_DEST_BYTE
                    597:         xor     al,FF_ONLY_1_SRC_BYTE or FF_ONLY_1_DEST_BYTE
                    598:         jz      cblt_3280
                    599:         mov     esi,offset FLAT:phase_align
                    600:         mov     ecx,PHASE_ALIGN_LEN
                    601:         rep     movsb
                    602: 
                    603: cblt_3280:
                    604:         test    fr.first_fetch,FF_TWO_INIT_FETCHES  ;Generate another fetch?
                    605:         jz      cblt_4000                           ;  No
                    606: 
                    607: ; A second fetch needs to be stuffed.  Copy the one just created.
                    608: 
                    609:         mov     esi,edi                 ;Get start of fetch logic
                    610:         xchg    esi,fr.start_fl         ;Set new start, get old
                    611:         mov     ecx,edi                 ;Compute how long fetch is
                    612:         sub     ecx,esi                 ;  and move the bytes
                    613:         rep     movsb
                    614: 
                    615:         subttl  Compile - ROP Generation
                    616:         page
                    617: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    618: ; Create the logic action code
                    619: ;
                    620: ; The given ROP will be converted into the actual code that
                    621: ; performs the ROP.
                    622: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    623: 
                    624: ; Copy the ROP template into the BLT
                    625: 
                    626: cblt_4000:
                    627:         mov     ax,fr.operands          ;Get back rop data
                    628:         mov     bl,ah                   ;Get count of number of bits to move
                    629:         and     ebx,HIGH ROPLength
                    630:         shr     ebx,2
                    631:         movzx   ecx,roptable+256[ebx] ;Get length into cx
                    632:         errnz   ROPLength-0001110000000000b
                    633: 
                    634:         mov     ebx,eax                 ;Get offset of the template
                    635:         and     ebx,ROPOffset
                    636:         lea     esi,roptable[ebx]       ;--> the template
                    637:         rep     movsb                   ;Move the template
                    638: 
                    639: cblt_4020:
                    640:         mov     bx,ax                   ;Keep rop around
                    641:         or      ah,ah                   ;Generate a negate?
                    642:         jns     cblt_4040               ; No
                    643:         mov     ax,I_NOT_AL
                    644:         stosw
                    645: 
                    646: public cblt_4040
                    647: cblt_4040:
                    648:         mov     fr.end_fl,edi           ;Save end of fetch/logic operation
                    649: 
                    650:         subttl  Compile - Mask And Save
                    651:         page
                    652: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    653: ; Generate code to mask and save the result.  If the destination
                    654: ; isn't in a register, it will be loaded from ES:[DI] first.  The
                    655: ; mask operation will then be performed, and the result stored.
                    656: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    657: 
                    658:         mov     ax,I_MOV_AH_DEST        ; ah,[edi]
                    659:         stosw
                    660: 
                    661:         mov     esi,offset FLAT:masked_store;Move rest of masked store template
                    662:         movsb                           ;Move size override
                    663:         movsd
                    664:         movsw
                    665:         errnz   MASKED_STORE_LEN-7      ;Must be seven bytes long
                    666:         mov     ax,fr.start_mask        ;Stuff start mask into
                    667:         xchg    ah,al                   ;  the template
                    668: 
                    669:         mov     [edi][MASKED_STORE_MASK],ax
                    670: 
                    671:         mov     fr.end_fls,edi          ;Save end of fetch/logic/store operation
                    672: 
                    673:         subttl  Compile - Inner Loop Generation
                    674:         page
                    675: ;-----------------------------------------------------------------------;
                    676: ; Now for the hard stuff; The inner loop (said with a "gasp!").
                    677: ;
                    678: ; If there is no innerloop, then no code will be generated
                    679: ; (now that's fast!).
                    680: ;-----------------------------------------------------------------------;
                    681: 
                    682: cblt_5000:
                    683:         mov     edx,fr.inner_loop_count ;Get the loop count
                    684:         or      dx,dx                   ;If the count is null
                    685:         jz      cblt_6000               ;  don't generate any code
                    686: 
                    687: ;!!! Since we no longer pass in the old style rops, we can;t enable this code
                    688: ;!!! and shold remove/alter it someday.  Besides, most of it is in special.asm
                    689: if 0                                    ;!!!
                    690: 
                    691: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    692: ; We have something for a loop count.  If this just happens to be
                    693: ; a source copy (S) with a phase of zero, then the innerloop degenerates
                    694: ; to a repeated MOVSB instruction.  This little special case is
                    695: ; worth checking for and handling!
                    696: ;
                    697: ; Also, if this is one of the special cases {P, Pn, DDx, DDxn}, then it
                    698: ; will also be special cased since these are all pattern fills (pattern,
                    699: ; not pattern, 0, 1).
                    700: ;
                    701: ; The same code can be shared for these routines, with the exception
                    702: ; that patterns use a STOSx instruction instead of a MOVSx instruction
                    703: ; and need a value loaded in AX
                    704: ;
                    705: ; So we lied a little.  If a color conversion is going on, then the
                    706: ; REP MOVSB might not be usable.  If the F1_REP_OK flag has been set, then
                    707: ; we can use it.  The F1_REP_OK flag will be set for a mono ==> color
                    708: ; conversion where the background color is white and the foreground
                    709: ; color is black, or for a color ==> mono conversion with the screen
                    710: ; as the source (the color compare register will be used).
                    711: ;
                    712: ; For the special cases {P, Pn, DDx, DDxn}, color conversion is
                    713: ; not possible, so ignore it for them.
                    714: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    715: 
                    716:         mov     bl,byte ptr fr.Rop      ;Get the raster op
                    717:         test    bl,EPS_INDEX            ;Can this be special cased?
                    718:         jnz     cblt_5500               ;  No
                    719:         errnz   <HIGH EPS_INDEX>
                    720:         errnz   SPEC_PARSE_STR_INDEX    ;The special case index must be 0
                    721: 
                    722:         test    bl,EPS_OFF              ;Is this a source copy
                    723:         jz      cblt_5040               ;  Yes
                    724:         errnz   <SOURCE_COPY AND 11b>   ;Offset for source copy must be 0
                    725: 
                    726: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    727: ; We should have one of the following fill operations:
                    728: ;
                    729: ;   P       - Pattern
                    730: ;   Pn      - NOT pattern
                    731: ;   DDx     - 0 fill
                    732: ;   DDxn    - 1 fill
                    733: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    734: 
                    735:         mov     ax,I_MOV_AL_0FFH        ;Assume this is a 0 or 1 fill
                    736:         test    bl,01h                  ;Is it 0 or 1 fill?
                    737:         jz      cblt_5020               ;  Yes, initialize AX with 0FFh
                    738:         mov     ax,I_MOV_AL_DH          ;  No,  initialize AX with pattern
                    739: 
                    740:         errnz      PAT_COPY-0000000000100001b
                    741:         errnz   NOTPAT_COPY-0000000000000001b
                    742:         errnz    FILL_BLACK-0000000001000010b
                    743:         errnz    FILL_WHITE-0000000001100010b
                    744: 
                    745: cblt_5020:
                    746:         stosw
                    747:         mov     ax,I_MOV_AH_AL
                    748:         stosw
                    749:         mov     si,I_STOSB              ;Set up for repeated code processor
                    750:         test    bl,LogPar               ;If Pn or 0, then complement pattern
                    751:         jnz     cblt_5060               ;  Is just P or 1
                    752:         errnz   <HIGH LogPar>
                    753:         mov     al,I_SIZE_OVERRIDE
                    754:         stosb
                    755:         mov     ax,I_NOT_AX             ;  Is Pn or 0, complement AX
                    756:         stosw
                    757:         jmp     short cblt_5060
                    758: 
                    759:         errnz      PAT_COPY-00100001b
                    760:         errnz   NOTPAT_COPY-00000001b
                    761:         errnz    FILL_BLACK-01000010b
                    762:         errnz    FILL_WHITE-01100010b
                    763: 
                    764: 
                    765: ; This is a source copy.  The phase must be zero for a source copy
                    766: ; to be condensed into a REP MOVSx.
                    767: 
                    768: cblt_5040:
                    769:         test    fr.phase_h,0FFh         ;Is horizontal phase zero?
                    770:         jnz     cblt_5500               ;  No, can't condense source copy
                    771:         mov     si,I_MOVSB              ;Set register for moving bytes
                    772: 
                    773: ; For a color conversion, F1_REP_OK must be set.
                    774: 
                    775:         test    fr.the_flags,F0_GAG_CHOKE   ;Color conversion?
                    776:         jz      cblt_5060                   ;  No, rep is OK to use
                    777:         test    fr.moore_flags,F1_REP_OK    ;  Yes, can we rep it?
                    778:         jz      cblt_5500                   ;    No, do it the hard way
                    779: 
                    780: 
                    781: ;-----------------------------------------------------------------------;
                    782: ; This is a source copy or pattern fill.  Process an odd byte with
                    783: ; a MOVSB or STOSB, then process the rest of the bytes with a REP
                    784: ; MOVSW or a REP STOSW.  If the REP isn't needed, leave it out.
                    785: ;
                    786: ; Don't get caught on this like I did!  If the direction of the
                    787: ; BLT is from right to left (decrementing addresses), then both
                    788: ; the source and destination pointers must be decremented by one
                    789: ; so that the next two bytes are processed, not the next byte and
                    790: ; the byte just processed.  Also, after all words have been processed,
                    791: ; the source and destination pointers must be incremented by one to
                    792: ; point to the last byte (since the last MOVSW or STOSW would have
                    793: ; decremented both pointers by 2).
                    794: ;
                    795: ; If the target machine is an 8086, then it would be well worth the
                    796: ; extra logic to align the fields on word boundaries before the MOVSxs
                    797: ; if at all possible.
                    798: ;
                    799: ; The generated code should look something like:
                    800: ;
                    801: ; WARP8:                               ;This code for moving left to right
                    802: ;         movsb                        ;Process an odd byte
                    803: ;         mov     ecx,gl_inner_loop_count/2 ;Set word count
                    804: ;         rep                          ;If a count, then repeat is needed
                    805: ;         movsw                        ;Move words until done
                    806: ;
                    807: ;
                    808: ; WARP8:                               ;This code for moving left to right
                    809: ;         movsb                        ;Process an odd byte
                    810: ;         dec     si                   ;adjust pointer for moving words
                    811: ;         dec     di
                    812: ;         mov     ecx,gl_inner_loop_count/2 ;Set word count
                    813: ;         rep                          ;If a count, then repeat is needed
                    814: ;         movsw                        ;Move words until done
                    815: ;         inc     si                   ;adjust since words were moved
                    816: ;         inc     di
                    817: ;
                    818: ;
                    819: ; Of course, if any part of the above routine isn't needed, it isn't
                    820: ; generated (i.e. the generated code might just be a single MOVSB)
                    821: ;-----------------------------------------------------------------------;
                    822: 
                    823: cblt_5060:
                    824:         shr     edx,1                   ;Byte count / 2 for words
                    825:         jnc     cblt_5080               ;  No odd byte to move
                    826:         mov     ax,si                   ;  Odd byte, move it
                    827:         stosb
                    828: 
                    829: cblt_5080:
                    830:         jz      cblt_5140               ;No more bytes to move
                    831:         xor     bx,bx                   ;Flag as stepping from left to right
                    832:         cmp     bl,fr.step_direction    ;Moving from the right to the left?
                    833:         errnz   STEPLEFT                ;  (left direction must be zero)
                    834:         jnz     cblt_5100               ;  No
                    835:         mov     ax,I_DEC_ESI_DEC_EDI    ;  Yes, decrement both pointers
                    836:         stosw
                    837:         mov     bx,I_INC_ESI_INC_EDI      ;Set up to increment the pointers later
                    838: 
                    839: 0cblt_5100:
                    840:         cmp     edx,1                   ;Move one word or many words?
                    841:         jz      cblt_5120               ;  Only one word
                    842:         mov     al,I_MOV_ECX_DWORD_I    ;  Many words, load count
                    843:         stosb
                    844:         mov     eax,edx
                    845:         stosd
                    846:         mov     al,I_REP                ;a repeat instruction
                    847:         stosb
                    848: 
                    849: cblt_5120:
                    850:         mov     al,I_SIZE_OVERRIDE
                    851:         stosb
                    852:         mov     ax,si                   ;Set the word instruction
                    853:         inc     ax
                    854:         stosb
                    855:         errnz   I_MOVSW-I_MOVSB-1       ;The word form of the instruction
                    856:         errnz   I_STOSW-I_STOSB-1       ;  must be the byte form + 1
                    857: 
                    858:         or      bx,bx                   ;Need to increment the pointers?
                    859:         jz      cblt_5140               ;  No
                    860:         mov     ax,bx                   ;  Yes, increment both pointers
                    861:         stosw
                    862: 
                    863: cblt_5140:
                    864:         jmp     cblt_6000               ;Done setting up the innerloop
                    865:         page
                    866: 
                    867: endif
                    868: 
                    869: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    870: ; There is some count for the innerloop of the BLT.  Generate the
                    871: ; required BLT. Two or four copies of the BLT will be placed on the
                    872: ; stack.   This allows the LOOP instruction at the end to be distributed
                    873: ; over two or four bytes instead of 1, saving 11 or 12 clocks for each
                    874: ; byte (for 4).  Multiply 12 clocks by ~ 16K and you save a lot of
                    875: ; clocks!
                    876: ;
                    877: ; If there are less than four (two) bytes to be BLTed, then no looping
                    878: ; instructions will be generated.  If there are more than four (two)
                    879: ; bytes, then there is the possibility of an initial jump instruction
                    880: ; to enter the loop to handle the modulo n result of the loop count.
                    881: ;
                    882: ; The innerloop code will look something like:
                    883: ;
                    884: ;   <       mov     cx,loopcount/n> ;load count if >n innerloop bytes
                    885: ;   <       jmp     short ???     > ;If a first jump is needed, do one
                    886: ;
                    887: ; BLTloop:
                    888: ;         replicate initial byte BLT code up to n times
                    889: ;
                    890: ;   <       loop    BLTloop >       ;Loop until all bytes processed
                    891: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    892: 
                    893: cblt_5500:
                    894:         mov     ebx,fr.end_fl           ;Compute size of the fetch code
                    895:         sub     ebx,fr.start_fl
                    896:         inc     ebx                     ;A stosb will be appended
                    897:         mov     esi,4                   ;Assume replication 4 times
                    898:         mov     cl,2                    ;  (shift count two bits left)
                    899:         cmp     ebx,32                  ;Small enough for 4 times?
                    900:         jc      cblt_5520               ;  Yes, replicate 4 times
                    901:         shr     esi,1                   ;  No,  replicate 2 times
                    902:         dec     ecx
                    903: 
                    904: cblt_5520:
                    905:         cmp     edx,esi                 ;Generate a loop? (edx = loopcount)
                    906:         jle     cblt_5540               ;  No, just copy code
                    907:         mov     al,I_MOV_ECX_DWORD_I
                    908:         stosb                           ;mov cx,loopcount/n
                    909:         mov     eax,edx                 ;Compute loop count
                    910:         shr     eax,cl
                    911:         stosd
                    912:         shl     eax,cl                  ;See if loopcount MOD n is 0
                    913:         sub     eax,edx
                    914:         jz      cblt_5540               ;Zero, no odd count to handle
                    915: 
                    916:         page
                    917: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    918: ; There is an odd portion of bytes to be processed.  Increment
                    919: ; the loop counter for the odd pass through the loop and then
                    920: ; compute the displacement for entering the loop.
                    921: ;
                    922: ; To compute the displacement, subtract the number of odd bytes
                    923: ; from the modulus being used  (i.e. 4-3=1).  This gives the
                    924: ; number of bytes to skip over the first time through the loop.
                    925: ;
                    926: ; Multiply this by the number of bytes for a logic sequence,
                    927: ; and the result will be the displacement for the jump.
                    928: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    929: 
                    930:         inc     dword ptr [edi][-4]     ;Not zero, adjust for partial loop
                    931:         add     eax,esi                 ;Compute where to enter the loop at
                    932:         push    edx
                    933:         mul     ebx
                    934:         pop     edx
                    935:         mov     ecx,eax
                    936:         mov     al,I_JMP_DISP32         ;Stuff jump instruction
                    937:         stosb
                    938:         mov     eax,ecx                 ;Stuff displacement for jump
                    939:         stosd
                    940: 
                    941: ;-----------------------------------------------------------------------;
                    942: ; Currently:      EDX = loop count
                    943: ;                 ESI = loop modulus
                    944: ;                 EBX = size of one logic operation
                    945: ;                 EDI --> next location in the loop
                    946: ;-----------------------------------------------------------------------;
                    947: 
                    948: cblt_5540:
                    949:         mov     ecx,ebx                 ;Set move count
                    950:         mov     ebx,edx                 ;Set maximum for move
                    951:         cmp     ebx,esi                 ;Is the max > what's left?
                    952:         jle     cblt_5560               ;  No, just use what's left
                    953:         mov     ebx,esi                 ;  Yes, copy the max
                    954: 
                    955: cblt_5560:
                    956:         sub     edx,esi                 ;If dx > 0, then loop logic needed
                    957:         mov     esi,fr.start_fl         ;--> fetch code to copy
                    958:         mov     eax,ecx                 ;Save a copy of fetch length
                    959:         rep     movsb                   ;Move fetch code and stuff stosb
                    960:         mov     esi,edi                   ;--> new source (and top of loop)
                    961:         sub     esi,eax
                    962:         mov     byte ptr [edi][-1],I_STOSB
                    963:         dec     ebx                     ;One copy has been made
                    964:         push    edx
                    965:         mul     ebx                     ;Compute # bytes left to move
                    966:         pop     edx
                    967:         mov     ecx,eax                 ;Set move count
                    968:         rep     movsb                   ;Move the fetches
                    969:         sub     esi,eax                 ;Restore pointer to start of loop
                    970: 
                    971:         page
                    972: 
                    973: ; The innermost BLT code has been created and needs the looping
                    974: ; logic added to it.  If there is any looping to be done, then
                    975: ; generate the loop code.  The code within the innerloop may be
                    976: ; greater than 126 bytes, so a LOOP instruction may not be used
                    977: ; in this case.
                    978: 
                    979: cblt_5580:
                    980:         or      edx,edx                 ;Need a loop?
                    981:         jle     cblt_6000               ;  No, don't generate one
                    982:         mov     al,I_DEC_ECX
                    983:         stosb
                    984:         mov     ax,I_JNZ_DISP32
                    985:         stosw
                    986:         mov     eax,esi                 ;Compute offset of loop
                    987:         sub     eax,edi
                    988:         sub     eax,4                   ;Bias by DISP32
                    989:         stosd
                    990: 
                    991: 
                    992:         subttl  Compile - Last Byte Processing
                    993:         page
                    994: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                    995: ; All the innerloop stuff has been processed.  Now generate the code for
                    996: ; the final byte if there is one.  This code is almost identical to the
                    997: ; code for the first byte except there will only be one fetch (if a
                    998: ; fetch is needed at all).
                    999: ;
                   1000: ; The code generated will look something like:
                   1001: ;
                   1002: ; <       fetch           >       ;Get source byte
                   1003: ; <       align           >       ;Align source if needed
                   1004: ;         action                  ;Perform desired action
                   1005: ;         mask and store
                   1006: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                   1007: 
                   1008: cblt_6000:
                   1009:         mov     dx,fr.last_mask         ;Get last byte mask
                   1010:         or      dh,dh                   ;Is there a last byte to be processed?
                   1011:         jz      cblt_6100               ;  No.
                   1012: 
                   1013:         mov     ecx,fr.end_fls          ;Get end of fetch/logic/store operation
                   1014:         mov     esi,fr.start_fl         ;Get start of fetch/logic sequence
                   1015:         sub     ecx,esi                 ;Compute length of the code
                   1016:         test    fr.first_fetch,FF_NO_LAST_FETCH
                   1017:         jz      cblt_include_fetch
                   1018:         test    fr.the_flags,F0_SRC_PRESENT ; was there a fetch?
                   1019:         jz      cblt_was_no_fetch
                   1020:         cmp     fr.phase_h,0            ; Phase zero case is not combined
                   1021:                                         ; into innerloop as it should be.
                   1022:                                         ; If the final byte is full then we
                   1023:                                         ; better not remove the lodsb ( i.e.
                   1024:         je      cblt_include_fetch      ; 0 - 0 = 0 would make us think we could)
                   1025: 
                   1026:         mov     eax,fr.cFetchCode       ; don't copy the fetch (lodsb)
                   1027:         add     esi,eax
                   1028:         sub     ecx,eax
                   1029: 
                   1030: cblt_was_no_fetch:
                   1031: cblt_include_fetch:
                   1032: 
                   1033:         rep     movsb                       ;Copy the fetch/action/store code
                   1034:         xchg    dh,dl
                   1035:         mov     [edi][MASKED_STORE_MASK],dx ;Stuff last byte mask into the code
                   1036: skip_save:
                   1037:         subttl  Compile - Looping Logic
                   1038:         page
                   1039: 
                   1040: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                   1041: ; Looping logic.
                   1042: ;
                   1043: ; The looping logic must handle monochrome bitmaps, color bitmaps,
                   1044: ; huge bitmaps, the device, the presence or absence of a source
                   1045: ; or pattern, and mono <==> color interactions.
                   1046: ;
                   1047: ; The type of looping logic is always based on the destination.
                   1048: ;
                   1049: ; Plane Update Facts:
                   1050: ;
                   1051: ; 1)  If the destination device is color, then there will be
                   1052: ;     logic for plane selection.  Plane selection is performed
                   1053: ;     at the start of the loop for the display.  Plane selection
                   1054: ;     for bitmaps is performed at the end of the loop in anticipation
                   1055: ;     of the next plane.
                   1056: ;
                   1057: ;     The following applies when the destination is color:
                   1058: ;
                   1059: ;     a)  The destination update consists of:
                   1060: ;
                   1061: ;         1)  If the destination is the display, the next plane will
                   1062: ;             be selected by the plane selection code at the start
                   1063: ;             of the scan line loop.
                   1064: ;
                   1065: ;         2)  If not the display, then the PDevice must a bitmap.
                   1066: ;             The next plane will be selected by updating the
                   1067: ;             destination offset by the next_plane value.
                   1068: ;
                   1069: ;
                   1070: ;     b)  If F0_GAG_CHOKE isn't specified, then there may be a source.
                   1071: ;         If there is a source, it must be color, and the update
                   1072: ;         consists of:
                   1073: ;
                   1074: ;         1)  If the source is the display, the next plane will be
                   1075: ;             selected by the plane selection code at the start of
                   1076: ;             the loop.
                   1077: ;
                   1078: ;         2)  If not the display, then the PDevice must a bitmap.
                   1079: ;             The next plane will be selected by updating the
                   1080: ;             destination offset by the next_plane value.
                   1081: ;
                   1082: ;
                   1083: ;     c)  If F0_GAG_CHOKE is specified, then the source must be a
                   1084: ;         monochrome bitmap which is undergoing mono to color
                   1085: ;         conversion.  The AND & XOR mask table which is used
                   1086: ;         for the conversion will have to be updated, unless
                   1087: ;         the F1_NO_MUNGE flag is set indicating that the color
                   1088: ;         conversion really wasn't needed.
                   1089: ;
                   1090: ;         The source's pointer will not be updated.  It will
                   1091: ;         remain pointing to the same scan of the source until
                   1092: ;         all planes of the destination have been processed.
                   1093: ;
                   1094: ;
                   1095: ;     d)  In all cases, the plane mask rotation code will be
                   1096: ;         generated.  If the plane indicator doesn't overflow,
                   1097: ;         then start at the top of the scan line loop for the
                   1098: ;         next plane.
                   1099: ;
                   1100: ;         If the plane indicator overflows, then:
                   1101: ;
                   1102: ;             1)  If there is a pattern present, it's a color
                   1103: ;                 pattern fetch.  The index of which scan of
                   1104: ;                 the brush to use will have to be updated.
                   1105: ;
                   1106: ;             2)  Enter the scan line update routine
                   1107: ;
                   1108: ;
                   1109: ; 2)      If the destination is monochrome, then there will be no
                   1110: ;         plane selection logic.
                   1111: ;
                   1112: ;         If F0_GAG_CHOKE is specified, then color ==> mono conversion
                   1113: ;         is taking place.  Any plane selection logic is internal
                   1114: ;         to the ROP byte fetch code.  Any color brush was pre-
                   1115: ;         processed into a monochrome brush, so no brush updating
                   1116: ;               need be done
                   1117: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                   1118: 
                   1119:         subttl  Looping Logic - Plane Selection
                   1120:         page
                   1121: 
                   1122: ; Get saved parameters off of the stack.
                   1123: ;
                   1124: ; <       pop     ebx            > ;Get plane indicator
                   1125: ; <       pop     esi            > ;Get source pointer
                   1126: ;         pop     edi              ;Get destination pointer
                   1127: ;         pop     ecx              ;Get loop count
                   1128: 
                   1129: cblt_6100:
                   1130:         mov     bh,fr.the_flags         ;These flags will be used a lot
                   1131:         test    bh,F0_DEST_IS_COLOR     ;Is the destination color?
                   1132:         jz      cblt_6120               ;  No
                   1133:         mov     al,I_POP_EBX            ;Restore plane index
                   1134:         stosb
                   1135: 
                   1136: cblt_6120:
                   1137:         test    bh,F0_SRC_PRESENT       ;Is a source needed?
                   1138:         jz      cblt_6140               ;  No
                   1139:         mov     al,I_POP_ESI            ;  Yes, get source pointer
                   1140:         stosb
                   1141: 
                   1142: cblt_6140:
                   1143:         mov     ax,I_POP_EDI_POP_ECX    ;Get destination pointer
                   1144:         stosw                           ;Get loop count
                   1145:         test    bh,F0_DEST_IS_COLOR     ;Color scanline update?
                   1146:         jz      cblt_6300               ;  No, just do the mono scanline update
                   1147: 
                   1148: ; The scanline update is for color.  Generate the logic to update
                   1149: ; a brush, perform plane selection, process mono ==> color conversion,
                   1150: ; and test for plane overflow.
                   1151: 
                   1152: cblt_6160:
                   1153:         or      bh,bh                   ;Color conversion?
                   1154:         jns     cblt_6180               ;  No
                   1155:         errnz   F0_GAG_CHOKE-10000000b
                   1156: 
                   1157:         page
                   1158: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                   1159: ; The source is monochrome.  Handle mono ==> color conversion.
                   1160: ; The AND & XOR mask table will need to be rotated for the next
                   1161: ; pass over the source.
                   1162: ;
                   1163: ; The source scanline pointer will not be updated until all planes
                   1164: ; have been processed for the current scan.
                   1165: ;
                   1166: ; If F1_NO_MUNGE has been specified, then the color conversion table
                   1167: ; and the color conversion code was not generated, and no update
                   1168: ; code will be needed.
                   1169: ;
                   1170: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                   1171: 
                   1172:         test    fr.moore_flags,F1_NO_MUNGE  ;Is there really a conversion table?
                   1173:         jnz     short cblt_6200             ;  No, so skip the code
                   1174: 
                   1175:         mov     al,I_MOV_EBP_DWORD_I        ;lea ebp,fr.ajM2C
                   1176:         stosb
                   1177:         lea     eax,fr.ajM2c                ;Get address of table
                   1178:         stosd
                   1179:         mov     esi,offset FLAT:rot_and_xor ;--> rotate code
                   1180:         mov     cx,LEN_ROT_AND_XOR
                   1181:         rep     movsb
                   1182:         jmp     short cblt_6200
                   1183: 
                   1184: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                   1185: ; If there is a source, it must be color.  If it is a memory
                   1186: ; bitmap, then the next plane must be selected, else it is
                   1187: ; the display and the next plane will be selected through
                   1188: ; the hardware registers.
                   1189: ;
                   1190: ; <       add     si,next_plane>
                   1191: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                   1192: 
                   1193: cblt_6180:
                   1194:         test    bh,F0_SRC_PRESENT       ;Is there really a source?
                   1195:         jz      cblt_6200               ;No source.
                   1196:         test    bh,F0_SRC_IS_DEV        ;Is the source the display?
                   1197:         jnz     cblt_6200               ;  Yes, use hardware plane selection
                   1198:         mov     ax,I_ADD_ESI_DWORD_I    ;  No, generate plane update
                   1199:         stosw                           ;Add si,next_plane
                   1200:         mov     eax,fr.src.next_plane
                   1201:         stosd
                   1202: 
                   1203: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                   1204: ; If the destination isn't the device, then it must be a color
                   1205: ; memory bitamp, and it's pointer will have to be updated by
                   1206: ; bmWidthPlanes.  If it is the display, then the next plane
                   1207: ; will be selected through the hardware registers.
                   1208: ;
                   1209: ; <       add     di,next_plane>
                   1210: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                   1211: 
                   1212: cblt_6200:
                   1213:         test    bh,F0_DEST_IS_DEV       ;Is the destination the display
                   1214:         jnz     cblt_6220               ;  Yes, don't generate update code
                   1215:         mov     ax,I_ADD_EDI_DWORD_I    ;  No, update bitmap to the next plane
                   1216:         stosw
                   1217:         mov     eax,fr.dest.next_plane
                   1218:         stosd
                   1219: 
                   1220: 
                   1221: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                   1222: ; The source and destination pointers have been updated.
                   1223: ; Now generate the plane looping logic.
                   1224: ;
                   1225: ; <       shl     bl,1           > ;Select next plane
                   1226: ; <       jnc     StartOfLoop    > ;  Yes, go process next
                   1227: ; <       mov     bl,PLANE_1     > ;Reset plane indicator
                   1228: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                   1229: 
                   1230: cblt_6220:
                   1231:         mov     ax,I_SHL_BL_1           ;Stuff plane looping logic
                   1232:         stosw
                   1233: 
                   1234:         mov     edx,fr.pNextPlane       ;Compute relative offset of
                   1235:         sub     edx,edi                 ;  start of loop
                   1236:         sub     edx,6                   ;Bias offset by length of jnc inst.
                   1237:         mov     ax,I_JNC_DISP32
                   1238:         stosw                           ;jnc StartOfLoop
                   1239:         mov     eax,edx
                   1240:         stosd
                   1241: 
                   1242:         subttl  Looping Logic - Color Brush Update
                   1243:         page
                   1244: 
                   1245: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                   1246: ; The plane update logic has been copied.  If a pattern was
                   1247: ; involved for a color BLT, then the pattern index will need
                   1248: ; to be updated to the next scanline for three plane mode.
                   1249: ;
                   1250: ; This will involve subtracting off 3*SIZE_PATTERN (MonoPlane),
                   1251: ; and adding in the increment.  The result must be masked with
                   1252: ; 00000111b to select the correct source.  Note that the update
                   1253: ; can be done with an add instruction and a mask operation.
                   1254: ;
                   1255: ; inc   index+MonoPlane   inc-MonoPlane   result   AND 07h
                   1256: ;
                   1257: ;  1       0+32 = 32        1-32 = -31       1         1
                   1258: ;  1       7+32 = 39        1-32 = -31       8         0
                   1259: ; -1       0+32 = 32       -1-32 = -33      FF         7
                   1260: ; -1       7+32 = 39       -1-32 = -33       6         6
                   1261: ;
                   1262: ; <       mov     al,[12345678] > ;Get brush index
                   1263: ; <       add     al,n          > ;Add displacement to next byte
                   1264: ; <       and     al,00000111b  > ;Keep it in range
                   1265: ; <       mov     [12345678],al > ;Store displacement to next byte
                   1266: ;
                   1267: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                   1268: 
                   1269:         test    bh,F0_PAT_PRESENT       ;Is a pattern involved?
                   1270:         jz      cblt_6300               ;  No
                   1271:         test    fr.brush_accel,SOLID_BRUSH
                   1272:         jnz     cblt_6300               ;Solid color fetch needs no updating
                   1273:         mov     al,I_MOV_AL_MEM
                   1274:         stosb                           ;mov al,[xxxxxxxx]
                   1275:         mov     edx,fr.addr_brush_index
                   1276:         mov     eax,edx
                   1277:         stosd
                   1278:         mov     al,I_ADD_AL_BYTE_I
                   1279:         mov     ah,fr.direction         ;add al,bais
                   1280:         sub     ah,oem_brush_mono       ;Anybody ever fly one of these things?
                   1281:         errnz   INCREASE-1              ;Must be a 1
                   1282:         errnz   DECREASE+1              ;Must be a -1
                   1283:         stosw
                   1284:         mov     ax,0700h+I_AND_AL_BYTE_I        ;and al,00000111b
                   1285:         stosw
                   1286:         mov     al,I_MOV_MEM_AL
                   1287:         stosb                           ;mov [xxxxxxxx],al
                   1288:         mov     eax,edx
                   1289:         stosd
                   1290: 
                   1291:         subttl  Looping Logic - Scan Line Update
                   1292:         page
                   1293: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                   1294: ; Generate the next scanline code.  The next scan line code must
                   1295: ; handle monochrome bitmaps, the device, the presence or absence
                   1296: ; of a source.
                   1297: ;
                   1298: ; Also color bitmaps, and mono <==> color interactions.
                   1299: ;
                   1300: ; <       add si,gl_src.next_scan> ;Normal source scan line update
                   1301: ;         add di,gl_dest.next_scan ;Normal destination scan line update
                   1302: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
                   1303: 
                   1304: ;!!! We have the problem in that this code assumes that cPlanes*cjBytesScan
                   1305: ;!!! is the same as next_scan.  This might not always be the case, and we
                   1306: ;!!! should do somehting about fixing this.  This would require pushing an
                   1307: ;!!! extra copy of pScan_n_Plane0 and then adding next-scan to this when we
                   1308: ;!!! have exhausted the planes for scan n
                   1309: 
                   1310: cblt_6300:
                   1311:         test    bh,F0_SRC_PRESENT       ;Is there a source?
                   1312:         jz      cblt_6340               ;  No, skip source processing
                   1313:         mov     ax,I_ADD_ESI_DWORD_I    ;add esi,increment
                   1314:         stosw
                   1315:         mov     eax,fr.src.next_scan
                   1316:         stosd
                   1317: 
                   1318: cblt_6340:
                   1319:         mov     ax,I_ADD_EDI_DWORD_I    ;add edi,increment
                   1320:         stosw
                   1321:         mov     eax,fr.dest.next_scan
                   1322:         stosd
                   1323: 
                   1324: ; Compile the scan line loop.  The code simply jumps to the start
                   1325: ; of the outer loop if more scans exist to be processed.
                   1326: 
                   1327: cblt_6380:
                   1328:         mov     al,I_DEC_ECX
                   1329:         stosb
                   1330:         mov     ax,I_JNZ_DISP32
                   1331:         stosw
                   1332:         mov     eax,fr.blt_addr         ;Compute relative offset of
                   1333:         sub     eax,edi                 ;  start of loop
                   1334:         sub     eax,4                   ;Adjust jump bias for DISP32
                   1335:         stosd                           ;  and store it into jump
                   1336: 
                   1337: cblt_6420:
                   1338:         mov     al,I_RET                ;Stuff the far return instruction
                   1339:         stosb
                   1340: 
                   1341:      cRet    cblt
                   1342: endProc cblt
                   1343: 
                   1344: _TEXT$01   ends
                   1345: 
                   1346:         end
                   1347: 

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.