|
|
1.1 root 1: page ,132
2: title BitBLT
3: ;---------------------------Module-Header------------------------------;
4: ; Module Name: blt.asm
5: ;
6: ; Copyright (c) 1992 Microsoft Corporation
7: ;-----------------------------------------------------------------------;
8: .386
9:
10: ifndef DOS_PLATFORM
11: .model small,c
12: else
13: ifdef STD_CALL
14: .model small,c
15: else
16: .model small,pascal
17: endif; STD_CALL
18: endif; DOS_PLATFORM
19:
20: assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
21: assume fs:nothing,gs:nothing
22:
23: .code
24:
25: _TEXT$02 SEGMENT DWORD USE32 PUBLIC 'CODE'
26: ASSUME CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
27:
28: .xlist
29: include stdcall.inc ; calling convention cmacros
30:
31: include i386\cmacFLAT.inc ; FLATland cmacros
32: include i386\display.inc ; Display specific structures
33: include i386\ppc.inc ; Pack pel conversion structure
34: include i386\bitblt.inc ; General definitions
35: include i386\ropdefs.inc ; Rop definitions
36: include i386\roptable.inc ; Raster op tables
37: include i386\egavga.inc ; EGA register definitions
38: include i386\strucs.inc ; Structure definitions
39: .list
40:
41:
42: EXTRNP cblt
43: EXTRNP check_device_special_cases
44: EXTRNP packed_pel_comp_y
45: EXTRNP vDIB4Preprocess
46: EXTRNP vDIB8Preprocess
47:
48: ; The following two bitmask tables are used for fetching
49: ; the first and last byte used-bits bitmask.
50:
51: public bitmask_tbl1
52: bitmask_tbl1 label byte
53: db 11111111b ;Masks for leftmost byte
54: db 01111111b
55: db 00111111b
56: db 00011111b
57: db 00001111b
58: db 00000111b
59: db 00000011b
60: db 00000001b
61:
62: public bitmask_tbl2
63: bitmask_tbl2 label byte
64: db 10000000b ;Masks for rightmost byte
65: db 11000000b
66: db 11100000b
67: db 11110000b
68: db 11111000b
69: db 11111100b
70: db 11111110b
71: db 11111111b
72:
73: ; phase_tbl1 is used for loading the "used" bits and "saved" bits
74: ; bitmasks for cases 1,2,3 where the step direction is left to
75: ; right. If it weren't for the case of zero, this could be done
76: ; with a simple rotate of 00FF. For cases 4,5,6, a simple rotate
77: ; can create the mask needed.
78:
79: public phase_tbl1
80: phase_tbl1 label word
81: db 11111111b,00000000b ;Used bits, saved bits
82: db 00000001b,11111110b
83: db 00000011b,11111100b
84: db 00000111b,11111000b
85: db 00001111b,11110000b
86: db 00011111b,11100000b
87: db 00111111b,11000000b
88: db 01111111b,10000000b
89:
90: ProcName xxxvCompiledBlt,vCompiledBlt,56
91:
92: xxxvCompiledBlt proc uses esi edi ebx,\
93: __pdsurfDst :ptr, \
94: __DestxOrg :dword, \
95: __DestyOrg :dword, \
96: __pdsurfSrc :ptr, \
97: __SrcxOrg :dword, \
98: __SrcyOrg :dword, \
99: __xExt :dword, \
100: __yExt :dword, \
101: __Rop :dword, \
102: __lpPBrush :ptr, \
103: __bkColor :dword, \
104: __TextColor :dword, \
105: __pulXlateVec :dword, \
106: __pptlBrush :ptr
107:
108: local fr[SIZE FRAME]:byte
109:
110: cld ;Let's make no assumptions about this!
111:
112: ; We be hacking, so copy all the parameters into the local frame right now,
113: ; so that the subroutines will have access to them.
114:
115: mov eax,__pdsurfDst
116: mov fr.pdsurfDst,eax
117: mov eax,__DestxOrg
118: mov fr.DestxOrg,ax
119: mov eax,__DestyOrg
120: mov fr.DestyOrg,ax
121: mov eax,__pdsurfSrc
122: mov fr.pdsurfSrc,eax
123: mov eax,__SrcxOrg
124: mov fr.SrcxOrg,ax
125: mov eax,__SrcyOrg
126: mov fr.SrcyOrg,ax
127: mov eax,__xExt
128: mov fr.xExt,ax
129: mov eax,__yExt
130: mov fr.yExt,ax
131: mov eax,__Rop
132: mov fr.Rop,eax
133: mov eax,__lpPBrush
134: mov fr.lpPBrush,eax
135: mov eax,__bkColor
136: mov fr.bkColor,eax
137: mov eax,__TextColor
138: mov fr.TextColor,eax
139: mov eax,__pulXlateVec
140: mov fr.ppcBlt.pulXlate,eax
141: mov eax,__pptlBrush
142: mov fr.pptlBrush,eax
143:
144: subttl ROP Preprocessing
145: page
146:
147: ;-----------------------------------------------------------------------;
148: ; Get the encoded raster operation, and map the raster op if needed.
149: ;
150: ; To map the ROPS 80h through FFh to 00h through 7Fh, take the
151: ; 1's complement of the ROP, and invert the "negate needed" flag.
152: ;-----------------------------------------------------------------------;
153:
154: xor ax,ax ;Assume not 80h : FFh
155: mov bl,byte ptr fr.Rop
156: or bl,bl ;Is this in the second half (80-FF)?
157: jns parse_10 ; No, rop index is correct
158: not bl ; Yes, want the inverse
159: mov ah,HIGH NEGATE_NEEDED ;Want to invert the not flag
160: errnz <LOW NEGATE_NEEDED>
161:
162: parse_10:
163: movzx ebx,bl
164: xor ax,roptable[ebx*2] ;Get ROP, maybe toggle negate flag
165: mov fr.operands,ax ;Save the encoded raster operation
166:
167: mov bl,ah ;Set fr.the_flags for source and pattern
168: and bl,HIGH (SOURCE_PRESENT+PATTERN_PRESENT)
169: ror bl,1
170:
171: errnz <SOURCE_PRESENT - 0010000000000000b>
172: errnz <PATTERN_PRESENT - 0100000000000000b>
173: errnz <F0_SRC_PRESENT - 00010000b>
174: errnz <F0_PAT_PRESENT - 00100000b>
175:
176: parse_end:
177:
178:
179: ;-----------------------------------------------------------------------;
180: ; pdevice_processing
181: ;
182: ; Check the required bitmaps for validity, get their parameters
183: ; and store the information locally.
184: ;
185: ; BL = Initial fr.the_flags
186: ; F0_SRC_PRESENT set if source needed
187: ; F0_PAT_PRESENT set if pattern needed
188: ;-----------------------------------------------------------------------;
189:
190: xor bh,bh ;BH = real fr.the_flags
191: mov fr.ppcBlt.fb,bh ;No packed pel converison
192: test bl,F0_SRC_PRESENT ;Is a source needed?
193: jz pdevice_decode_dest ; No, skip source validation
194: mov esi,fr.pdsurfSrc ;Get pointer to source
195: lea edi,fr.src ;--> where parameters will go
196: push ebp
197: lea ebp,fr.ppcBlt
198: cCall copy_dev ;Get all the data
199: pop ebp
200: test fr.ppcBlt.fb,PPC_NEEDED ;Will we be converting from packed pel?
201: jz pdevice_decode_dest ; No
202: MovAddr eax,vDIB8Preprocess,0
203: cmp fr.ppcBlt.iFormat,BMF_8BPP
204: je short @F
205: MovAddr eax,vDIB4Preprocess,0
206: @@:
207: push ebp ; Yes, do serious messing around
208: lea ebp,fr ; Needs a frame pointer
209: call eax
210: pop ebp
211:
212: pdevice_decode_dest:
213: mov esi,fr.pdsurfDst ;Get pointer to destination
214: lea edi,fr.dest ;--> where parameters will go
215: cCall copy_dev ;Get all the data
216:
217: ; The pattern fetch code will be based on the color format of the
218: ; destination. If the destination is mono, then a mono fetch will be
219: ; performed. If the destination is color, then a color fetch will be
220: ; performed.
221:
222: or bh,bl ;Merge in F0_SRC_PRESENT, F0_PAT_PRESENT
223: test bh,F0_DEST_IS_COLOR ;Show color pattern needed if
224: jz pdevice_chk_color_conv ; destination is color
225: or bh,F0_COLOR_PAT
226:
227: ; Check for color conversion. If so, then set F0_GAG_CHOKE.
228: ; Color conversion will exist if the source and destination are of
229: ; different color formats.
230:
231: pdevice_chk_color_conv:
232: test bh,F0_SRC_PRESENT ;Is there a source?
233: jz pdevice_set_dest_flag ; No, cannot be converting.
234: mov al,bh
235: and al,F0_SRC_IS_COLOR+F0_DEST_IS_COLOR
236: jz pdevice_set_src_flag ;Both are monochrome
237: xor al,F0_SRC_IS_COLOR+F0_DEST_IS_COLOR
238: jz pdevice_set_src_flag ;Both are color
239: or bh,F0_GAG_CHOKE ;Mono ==> color or color ==> mono
240: mov al,fr.bkColor.SPECIAL
241: mov ah,fr.TextColor.SPECIAL
242: errnz C0_BIT+C1_BIT+C2_BIT+C3_BIT-0Fh
243: and ax,0F0Fh
244: mov fr.both_colors,ax
245:
246: ; Setup the scan line update flag in the source device structure.
247: ; The source will use a monochrome style update if it is the display,
248: ; it is monochrome, or it is color and the destination device is
249: ; monochrome.
250:
251: pdevice_set_src_flag:
252: mov al,bh ;Set 'Z' if to use color update
253: and al,F0_SRC_IS_DEV+F0_SRC_IS_COLOR+F0_DEST_IS_COLOR
254: xor al,F0_SRC_IS_COLOR+F0_DEST_IS_COLOR
255: jnz pdevice_set_dest_flag ;Use the mono update
256: or fr.src.dev_flags,COLOR_UP;Show color scan update
257:
258: ; Setup the scan line update flag in the destination device
259: ; structure. The destination will use a monochrome update
260: ; if it is monochrome or the display. It will use a color
261: ; update if it is a color bitmap.
262:
263: pdevice_set_dest_flag:
264: mov al,bh ;Set 'Z' if to use color destination
265: and al,F0_DEST_IS_DEV+F0_DEST_IS_COLOR; update code
266: xor al,F0_DEST_IS_COLOR
267: jnz pdevice_proc_end ;Mono update
268: or fr.dest.dev_flags,COLOR_UP;Show color scan update
269: pdevice_proc_end:
270:
271:
272: push ebp ;Set up pointer to frame variables
273: lea ebp,fr
274: cCall pattern_preprocessing
275: pop ebp
276:
277: subttl Phase Processing (X)
278: page
279:
280: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
281: ; Now the real work comes along: In which direction will the
282: ; copy be done? Refer to the 10 possible types of overlap that
283: ; can occur (10 cases, 4 resulting types of action required).
284: ;
285: ; If there is no source bitmap involved in this particular BLT,
286: ; then the path followed must allow for this. This is done by
287: ; setting both the destination and source parameters equal.
288: ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
289:
290: phase_processing:
291: phase_processing_x:
292: mov dx,fr.xExt ;Get X extent
293: dec dx ;Make X extent inclusive
294: mov bx,fr.DestxOrg ;Get destination X origin
295: mov di,bx
296: and bx,00000111b ;Get offset of destination within byte
297:
298: ; If there is no source, then just use the pointer to the destination
299: ; bitmap and load the same parameters, which will cause the "equality"
300: ; path to be followed in the set-up code. This path is the favored
301: ; path for the case of no source bitmap.
302:
303: mov ax,di ;Assume no source needed
304: test fr.the_flags,F0_SRC_PRESENT;Is a source needed?
305: jz phase_proc_10 ; No, just use destination parameters
306: mov ax,fr.SrcxOrg ; Yes, get source origin X
307: mov fr.first_fetch,FF_TWO_INIT_FETCHES
308: ; Assume two initial fetches (if no
309: ; source, then it will be set = 1
310: ; later)
311: phase_proc_10:
312: mov si,ax
313: and ax,00000111b ;Get offset of source within byte
314: cmp si,di ;Which direction will we be moving?
315: jl phase_proc_30 ;Move from right to left
316:
317: ; The starting X of the source rectangle is >= the starting X of
318: ; the destination rectangle, therefore we will be moving bytes
319: ; starting from the left and stepping right.
320: ;
321: ; Alternatively, this is the path taken if there is no source
322: ; bitmap for the current BLT.
323: ;
324: ; Rectangle cases: 3,4,5,6,8
325:
326: sub al,bl ;Compute horiz. phase (source-dest)
327: mov fr.step_direction,STEPRIGHT ;Set direction of move
328: movzx ebx,bx
329: mov ah,[ebx].bitmask_tbl1 ;Get starting byte mask
330: ja phase_proc_two_fetches ;Scan line case 2, everything is
331: ; already set for this case.
332:
333: ; Scan line cases 1 and 3:
334: ;
335: ; The correct first byte fetch needs to be set for the beginning
336: ; of the outer loop, and the phase must be made into a positive
337: ; number.
338: ;
339: ; This is the path that will be followed if there is no source bitmap
340: ; for the current BLT.
341:
342: mov fr.first_fetch,FF_ONE_INIT_FETCH;Set one initial fetch
343: jmp short pp_only_one_init_fetch
344:
345: ;-----------------------------------------------------------------------;
346: ; If we get all the bits we need in the first fetch then a second
347: ; (unnecessary) fetch could cause a GP Fault. So let's examine this:
348: ; The number of bits from (SI mod 8) to the end of the byte is the number
349: ; of available bits we get on the first fetch. This is (8 - (SI mod 8)).
350: ; If this is greater than or equal to xExt then we have all the bits we
351: ; need and we better not do the second fetch (even though the phase
352: ; relationship may suggest we need it).
353: ;
354: ; Conclusion: If (8 - (SI mod 8)) >= xExt then DO NOT make second fetch.
355: ;-----------------------------------------------------------------------;
356:
357: phase_proc_two_fetches:
358: mov cx,8
359: sub cl,bl
360: sub cl,al
361:
362: ; We can save a couple cycles here since xExt - 1 is already in DX.
363: ; The condition CX >= xExt is the same as CX > DX.
364:
365: cmp cx,dx ;CX = (SI mod 8), DX = (xExt - 1)
366: jle pp_second_fetch_really_needed
367:
368:
369: ;-----------------------------------------------------------------------;
370: ; We are here BECAUSE the xExt is so small that we can get all the bits
371: ; on the scanline with a single lodsb (no byte boundary is crossed) AND
372: ; the phase relationship indicates that a second initial fetch is needed.
373: ;
374: ; We will override it and only do one fetch. However, if we simply
375: ; fail to do the second fetch then the phase code will screw us.
376: ; It will be expecting the bits to get fetched in the first fetch, saved
377: ; after the rotate, and mixed in in the second fetch's phase code.
378: ; So after the first fetch the bits have been saved in BH, and ANDed out
379: ; of the src data in AL.
380: ;
381: ; The solution is to set a flag here that tells the phase generation code
382: ; not to generate the usual masking part of the phase code.
383: ;
384: ; Short Bitblt Cases: (8 bits or less)
385: ;
386: ; 1) neither crosses byte boundary.
387: ;
388: ; a) phase requires second initial fetch
389: ;
390: ; Kill the phase masking. It will screw us. There will
391: ; be just one lodsb and one stosb and the first byte mask
392: ; will protect the dest bits that should not get hit.
393: ;
394: ; b) phase requires only one initial fetch
395: ;
396: ; Phase masking is irrelevant. Removing it would
397: ; be an optimiztation.
398: ;
399: ; 2) dest crosses byte boundary, but src does not
400: ;
401: ; a) phase requires second initial fetch
402: ;
403: ; impossible situation: the way we determine that a 2nd fetch
404: ; is necessary is if the first fetch does not get enough needed
405: ; bits to satisfy the first dest byte. Here the first fetch
406: ; gets ALL the bits and the first dest byte needs less than
407: ; ALL because it crosses a byte boundary.
408: ;
409: ; b) phase requires only one initial fetch
410: ;
411: ; Intervention would be bad. None is necessary since the 2nd
412: ; initial fetch will not be done. If we do intervene we will
413: ; cause trouble: Killing the masking will prevent the
414: ; "saved bits" from being saved. The first byte masking
415: ; can kill off these bits in AL and they will never
416: ; make it to the second stosb.
417: ;
418: ; 3) src crosses byte boundary (dest may or may not)
419: ; (this is known to be untrue at this point)
420: ;
421: ; There are bits we need in the second fetch, so a second
422: ; initial fetch can not cause a GP fault. Therefore do
423: ; everything the same as we would have before.
424: ;
425: ;
426: ; Conclusion: Intervention to kill the phase masking is
427: ; necessary iff
428: ; [src does not cross byte boundary] AND
429: ; dest does not cross byte boundary AND
430: ; [phase requires second initial fetch].
431: ; and bad if
432: ; dest crosses byte boundary, but [src does not]
433: ;
434: ; Statements in [] are known to be true at this point.
435: ;
436: ; Solution:
437: ;
438: ; If we always kill the phase-masking when neither crosses a byte
439: ; boundary and never kill it otherwise then everyone will be happy
440: ; (regardless of other conditions like whether phase requests a 2nd
441: ; initial fetch).
442: ;-----------------------------------------------------------------------;
443:
444: mov fr.first_fetch,FF_ONLY_1_SRC_BYTE
445: .errnz FF_ONE_INIT_FETCH
446:
447: pp_second_fetch_really_needed:
448: pp_only_one_init_fetch:
449: mov ch,ah
450:
451: ;-----------------------------------------------------------------------;
452: ; We now have the correct phase and the correct first character fetch
453: ; routine set. Save the phase and ...
454: ;
455: ; currently: AL = phase
456: ; BL = dest start mod 8
457: ; CH = first byte mask
458: ; DX = inclusive X bit count
459: ; SI = source X start (if there is a source)
460: ; DI = destination X start
461: ;-----------------------------------------------------------------------;
462:
463: phase_proc_20:
464: add al,8 ;Phase must be positive
465: and al,00000111b
466:
467: ; To calculate the last byte mask, the inclusive count can be
468: ; added to the start X MOD 8 value, and the result taken MOD 8.
469: ; This is attractive since this is what is needed later for
470: ; calculating the inclusive byte count, so save the result
471: ; of the addition for later.
472:
473: add bx,dx ;Add inclusive extent to dest MOD 8
474: mov dx,bx ;Save for innerloop count !!!
475: and ebx,00000111b ;Set up bx for a base reg
476: mov cl,[ebx].bitmask_tbl2 ;Get last byte mask
477:
478: ;-----------------------------------------------------------------------;
479: ; To avoid GP faults, we must never do an extra fetch we don't need.
480: ; When we're ready for the last fetch there may already be enough bits
481: ; saved from the previous fetch (which we plan to combine with the bits
482: ; in the fetch we are about to do). If so then we'd better not do this
483: ; last fetch (it could cause a GP fault).
484: ;
485: ; The number of bits we have left from the previous byte is (8 - AL)
486: ; AL is the phase. (1 + BL) is the number of bits we actually need
487: ; to write to the final destination byte.
488: ;
489: ; So if (8 - AL) >= (1 + BL) then DO NOT do the last fetch. This
490: ; simplifies: if (BL + AL) <= 7 then DO NOT do the last fetch.
491: ;-----------------------------------------------------------------------;
492:
493: add bl,al
494: cmp bl,7
495: jg phase_proc_last_fetch_needed
496: or fr.first_fetch,FF_NO_LAST_FETCH
497: phase_proc_last_fetch_needed:
498:
499: mov bl,al ;Compute offset into phase mask table
500: movzx ebx,bx
501: mov bx,[ebx*2].phase_tbl1 ;Get the phase mask
502:
503: ; Currently:
504: ; AL = phase
505: ; BX = phase mask
506: ; CL = last byte mask
507: ; CH = first byte mask
508: ; DX = inclusive bit count + dest start MOD 8
509: ; SI = source X start (if there is a source)
510: ; DI = destination starting X
511:
512: jmp phase_proc_50 ;Finish here
513:
514: ; The starting X of the source rectangle is < the X of the destination
515: ; rectangle, therefore we will be moving bytes starting from the right
516: ; and stepping left.
517: ;
518: ; This code should never be reached if there is no source bitmap
519: ; for the current BLT.
520: ;
521: ; Rectangle cases: 1,2,7
522:
523: phase_proc_30:
524: mov fr.step_direction,ah ;Set direction of move
525: errnz STEPLEFT
526: movzx ebx,bx
527: mov cl,[ebx].bitmask_tbl1 ;Get last byte mask
528: push bx
529: add ax,dx ;Find end of the source
530:
531: ; To calculate the first byte mask, the inclusive count is
532: ; added to the start MOD 8 value, and the result taken MOD 8.
533: ; This is attractive since this is what is needed later for
534: ; calculating the inclusive byte count, so save the result
535: ; of the addition for later.
536:
537: add bx,dx ;Find end of the destination
538: add di,dx ;Will need to update dest start address
539: add si,dx ; and source's too
540: mov dx,bx ;Save inclusive bit count + start MOD 8
541: and ax,00000111b ;Get source offset within byte
542: and ebx,00000111b ;Get dest offset within byte
543: mov ch,[ebx].bitmask_tbl2 ;Get start byte mask
544: cmp al,bl ;Compute horiz. phase (source-dest)
545: jb pp_double_fetch ;Scan line case 5, everything is
546: ; already set for this case.
547:
548: ; Scan line cases 4 and 6:
549: ;
550: ; The correct first byte fetch needs to be set for the beginning
551: ; of the outer loop
552:
553: mov fr.first_fetch,FF_ONE_INIT_FETCH;Set initial fetch routine
554: jmp short pp_one_initial_fetch
555:
556: ;-----------------------------------------------------------------------;
557: ; If only-one-fetch is already set, then the following is a NOP.
558: ; It doesn't seem worth the effort to check and jmp around.
559: ;
560: ; If we get all the bits we need in the first fetch then a second
561: ; (unnecessary) fetch could cause a GP Fault. So let's examine this:
562: ;
563: ; (DX + SI) points to the first pel (remember we're stepping left).
564: ; So the number of needed bits we get in the first fetch is
565: ; ((DX + SI + 1) mod 8). This is currently equal to AX.
566: ; If AX >= xExt then we'd better not do two init fetches.
567: ;-----------------------------------------------------------------------;
568:
569: pp_double_fetch:
570: dec fr.xExt
571: cmp ax,fr.xExt
572: jl pp_double_fetch_really_needed
573: mov fr.first_fetch,FF_ONLY_1_SRC_BYTE
574: .errnz FF_ONE_INIT_FETCH
575: pp_double_fetch_really_needed:
576: inc fr.xExt
577:
578: pp_one_initial_fetch:
579: sub al,bl ;Compute horiz. phase (source-dest)
580: add al,8 ;Ensure phase positive
581: and al,00000111b
582:
583: ;-----------------------------------------------------------------------;
584: ; To avoid GP faults must never do an extra fetch we don't need.
585: ; The last byte fetch is unnecessary if Phase is greater than or equal
586: ; to 8 - BL. Phase is the number of bits we still have from the previous
587: ; fetch. 8 - BL is the number of bits we actually need to write to the
588: ; final destination byte. So if AL - (8 - BL) >= 0 skip the last fetch.
589: ;-----------------------------------------------------------------------;
590:
591: pop bx
592: add bl,al
593: sub bl,8
594: jl pp_need_last_fetch
595: or fr.first_fetch,FF_NO_LAST_FETCH
596: pp_need_last_fetch:
597: phase_proc_40:
598:
599: ;-----------------------------------------------------------------------;
600: ; We now have the correct phase and the correct first character fetch
601: ; routine set. Generate the phase mask and save it.
602: ;
603: ; currently: AL = phase
604: ; CH = first byte mask
605: ; CL = last byte mask
606: ; DX = inclusive bit count + start MOD 8
607:
608: mov ah,cl ;Save last mask
609: mov cl,al ;Create the phase mask
610: mov bx,00FFh ; by shifting this
611: shl bx,cl ; according to the phase
612: mov cl,ah ;Restore last mask
613: ; jmp phase_proc_50 ;Go compute # of bytes to BLT
614: errn$ phase_proc_50
615:
616:
617: ; The different processing for the different X directions has been
618: ; completed, and the processing which is the same regardless of
619: ; the X direction is about to begin.
620: ;
621: ; The phase mask, the first/last byte masks, the X byte offsets,
622: ; and the number of innerloop bytes must be calculated.
623: ;
624: ;
625: ; Nasty stuff coming up here! We now have to determine how
626: ; many bits will be BLTed and how they are aligned within the bytes.
627: ; This is how it's done (or how I'm going to do it):
628: ;
629: ; The number of bits (inclusive number that is) is added to the
630: ; start MOD 8 value ( the left side of the rectangle, minimum X
631: ; value), then the result is divided by 8. Then:
632: ;
633: ;
634: ; 1) If the result is 0, then only one destination byte is being
635: ; BLTed. In this case, the start & ending masks will be ANDed
636: ; together, the innerloop count (# of full bytes to BLT) will
637: ; be zeroed, and the fr.last_mask set to all 0's (don't alter any
638: ; bits in last byte which will be the byte following the first
639: ; (and only) byte).
640: ;
641: ; | x x x x x| |
642: ; |_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|
643: ; 0 1 2 3 4 5 6 7
644: ;
645: ; start MOD 8 = 3, extent-1 = 4
646: ; 3+7 DIV 8 = 0, only altering one byte
647: ;
648: ;
649: ;
650: ; 2) If the result is 1, then only two bytes will be BLTed.
651: ; In this case, the start and ending masks are valid, and
652: ; all that needs to be done is set the innerloop count to 0.
653: ; (it is true that the last byte could have all bits affected
654: ; the same as if the innerloop count was set to 1 and the
655: ; last byte mask was set to 0, but I don't think there would be
656: ; much time saved special casing this).
657: ;
658: ; | x x x x x x x|x x x x x x x|
659: ; |_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|
660: ; 0 1 2 3 4 5 6 7
661: ;
662: ; start MOD 8 = 1, extent-1 = 14
663: ; 3+14 DIV 8 = 1. There is a first and last
664: ; byte but no innerloop count
665: ;
666: ;
667: ;
668: ; 3) If the result is >1, then there is some number of entire
669: ; bytes to be BLted by the innerloop. In this case the
670: ; number of innerloop bytes will be the result - 1.
671: ;
672: ; | x|x x x x x x x x|x
673: ; |_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|
674: ; 0 1 2 3 4 5 6 7
675: ;
676: ; start MOD 8 = 7, extent-1 = 9
677: ; 7+9 DIV 8 = 2. There is a first and last
678: ; byte and an innerloop count of 1 (result - 1)
679: ;
680: ; Currently: AL = horizontal phase
681: ; BX = horizontal phase mask
682: ; CH = first byte mask
683: ; CL = last byte mask
684: ; DX = left side X MOD 8 + inclusive X count
685: ; SI = source start X
686: ; DI = dest start X
687:
688: phase_proc_50:
689: mov fr.phase_h,al ;Save horizontal phase
690: mov fr.mask_p,bx ;Save phase mask
691: shr dx,3 ;/8 to get full byte count
692: jnz phase_proc_60 ;Result is >0, check it out
693:
694: ; There will only be one byte affected. Therefore the two byte masks
695: ; must be combined, the last byte mask cleared, and the innerloop
696: ; count set to zero.
697:
698: or fr.first_fetch,FF_ONLY_1_DEST_BYTE
699: and ch,cl ;Combine the two masks
700: xor cl,cl ;Clear out the last byte mask
701: inc dx ;Now just fall through to set
702: errn$ phase_proc_60 ; the innerloop count to 0!
703:
704: phase_proc_60:
705: dec dx ;Dec count (might become 0 just like
706: movzx edx,dx ; we want), and save it
707: mov fr.inner_loop_count,edx
708: mov bl,ch
709: mov ch,cl ;Compute last byte mask
710: not cl ; and save it
711: mov fr.last_mask,cx
712: mov bh,bl ;Compute start byte mask
713: not bl ; and save it
714: mov fr.start_mask,bx
715:
716: ; There may or may not be a source bitmap for the following address
717: ; computation. If there is no source, then the vertical setup code
718: ; will be entered with both the source and destination Y's set to the
719: ; destination Y and the address calculation skipped. If there is a
720: ; source, then the address calculation will be performed and the
721: ; vertical setup code entered with both the source and destination Y's.
722:
723: phase_processing_y:
724: shr di,3 ;Compute byte offset of destination
725: movzx edi,di ; and add to current destination
726: add fr.dest.lp_bits,edi ; offset
727:
728: mov dx,fr.DestyOrg ;Get destination Y origin
729: mov ax,dx ;Assume no source
730: mov cl,fr.the_flags
731: test cl,F0_SRC_PRESENT ;Is a source needed?
732: jz phase_proc_70 ; No, skip source set-up
733: shr si,3 ;Compute byte offset of source
734: movzx esi,si ; and add to current source offset
735: add fr.src.lp_bits,esi
736: mov ax,fr.SrcyOrg ;Get source Y origin
737:
738:
739: subttl Phase Processing (Y)
740: page
741:
742: ; The horizontal parameters have been calculated. Now the vertical
743: ; parameters must be calculated.
744: ;
745: ; Currently:
746: ; DX = destination Y origin
747: ; AX = source Y origin (destination origin if no source)
748: ; CL = fr.the_flags
749:
750: phase_proc_70:
751: mov bx,fr.yExt ;Get the Y extent of the BLT
752: dec bx ;Make it inclusive
753:
754:
755: ; The BLT will be Y+ if the top of the source is below or equal
756: ; to the top of the destination (cases: 1,4,5,7,8). The BLT
757: ; will be Y- if the top of the source is above the top of the
758: ; destination (cases: 2,3,6)
759: ;
760: ;
761: ; !...................!
762: ; !D !
763: ; ____! ..x !
764: ; |S ! : ! Start at top of S walking down
765: ; | ! !
766: ; | !...................!
767: ; | :
768: ; |____________________:
769: ;
770: ;
771: ; __________________
772: ; |S |
773: ; | ..................... Start at bottom of S walking up
774: ; | !D !
775: ; | ! : !
776: ; |____! ..x !
777: ; ! !
778: ; !....................
779:
780:
781: mov ch,INCREASE ;Set Y direction for top to bottom
782: cmp ax,dx ;Which direction do we move?
783: jge phase_proc_80 ;Step down screen (cases: 1,4,5,7,8)
784:
785:
786: ; Direction will be from bottom of the screen up (Y-)
787: ;
788: ; This code will not be executed if there is no source since
789: ; both Y's were set to the destination Y.
790:
791:
792: add dx,bx ;Find bottom scan line index for
793: add ax,bx ; destination and source
794: mov ch,DECREASE ;Set pattern increment
795:
796: phase_proc_80:
797: mov fr.pat_row,dl ;Set pattern row and increment
798: mov fr.direction,ch
799: sar ch,1 ;Map FF==>FF, 01==>00
800: errnz DECREASE-0FFFFFFFFh
801: errnz INCREASE-00001h
802:
803:
804: ; The Y direction has been computed. Compute the rest of the
805: ; Y parameters. These include the actual starting address,
806: ; the scan line and plane increment values, and whether or not
807: ; the extents will cross a 64K boundary.
808: ;
809: ; Currently:
810: ; DX = Y of starting destination scan
811: ; AX = Y of starting source scan
812: ; CH = BLT direction
813: ; 00 = increasing BLT, Y+
814: ; FF = decreasing BLT, Y-
815: ; CL = fr.the_flags
816: ; BX = inclusive Y extent
817:
818:
819: phase_proc_90:
820: test cl,F0_SRC_PRESENT ;Is a source needed?
821: movsx ecx,ch ; (Want ECX = +/- 1)
822: jz phase_proc_100 ; No, skip source set-up
823: test fr.ppcBlt.fb,PPC_NEEDED ;Packed pel conversion needed?
824: jz phase_proc_95 ; No, use normal setup
825: push ebp ; Yes, perform packed pel work
826: lea ebp,fr
827: cCall packed_pel_comp_y
828: pop ebp
829: jmp short phase_proc_100
830:
831: phase_proc_95:
832: push dx ;Save destination Y
833: push ebp ;Mustn't trash frame pointer
834: lea ebp,fr.src ;--> source data structure
835: cCall compute_y ;Process as needed
836: pop ebp
837: pop dx ;Restore destination Y
838:
839: phase_proc_100:
840: push ebp ;Mustn't trash frame pointer
841: mov ax,dx ;Put destination Y in ax
842: lea ebp,fr.dest ;--> destination data structure
843: cCall compute_y
844: pop ebp ;Restore frame pointer
845:
846: push ebp
847: lea ebp,fr
848: cCall check_device_special_cases
849: pop ebp
850: jc bitblt_exit ;C ==> BLT done w/special case
851:
852:
853: subttl Memory allocation for BLT compilation
854: page
855: cblt_allocate:
856: sub esp,MAX_BLT_SIZE
857: mov edi,esp
858: mov fr.blt_addr,edi ;Save the address for later
859: push ebp
860: lea ebp,fr
861: cCall cblt ;compile the BLT onto the stack
862: pop ebp
863:
864: ; The BLT has been created on the stack. Set up the initial registers,
865: ; set the direction flag as needed, and execute the BLT.
866:
867: mov esi,fr.src.lp_bits ;--> source device's first byte
868: mov edi,fr.dest.lp_bits ;--> destination device's first byte
869: mov cx,fr.yExt ;Get count of lines to BLT
870: cld ;Assume this is the direction
871: cmp fr.step_direction,STEPRIGHT ;Stepping to the right?
872: jz call_blt_do_it ; Yes
873: std
874: call_blt_do_it:
875: push ebp ;MUST SAVE THIS
876: call fr.blt_addr ;Call the FAR process
877: pop ebp
878: add esp,MAX_BLT_SIZE ;Return BLT space
879: errn$ bitblt_exit
880:
881: bitblt_exit:
882:
883: ; Here we test if the VGA was involved and skip reseting the VGA state if
884: ; it was not involved.
885:
886: test fr.the_flags,F0_DEST_IS_DEV + F0_SRC_IS_DEV
887: jz ega_not_involved
888:
889: ; Restore EGA registers to the default state.
890:
891: mov dx,EGA_BASE + SEQ_DATA
892: mov al,MM_ALL
893: out dx,al
894: mov dl,GRAF_ADDR
895: mov ax,0FF00h + GRAF_BIT_MASK
896: out dx,ax
897: mov ax,DR_SET shl 8 + GRAF_DATA_ROT
898: out dx,ax
899: mov ax,GRAF_ENAB_SR
900: out dx,ax
901: ega_not_involved:
902:
903: mov eax,1 ;Clear out error register (good exit)
904: cld ;Leave direction cleared
905: cRet vCompiledBlt
906:
907: xxxvCompiledBlt endp
908:
909:
910:
911: ;----------------------------Private-Routine----------------------------;
912: ; copy_device
913: ;
914: ; Copy device information to frame.
915: ;
916: ; Entry:
917: ; ESI = pdsurf
918: ; EDI --> frame DEV structure
919: ; EBP --> PPC structure (for source only)
920: ; BH = fr.the_flags, accumulated so far
921: ; Returns:
922: ; BH = fr.the_flags, accumulated so far
923: ; Carry clear if no error
924: ; EBP --> PPC structure
925: ; Error Returns:
926: ; None
927: ; Calls:
928: ; None
929: ; History:
930: ; Sun 22-Feb-1987 16:29:09 -by- Walt Moore [waltm]
931: ; Created.
932: ;-----------------------------------------------------------------------;
933:
934: cProc copy_dev
935:
936: mov eax,[esi].dsurf_sizlSurf.sizl_cx
937: mov [edi].width_bits,ax
938: mov eax,[esi].dsurf_sizlSurf.sizl_cy
939: mov [edi].height,ax
940: mov eax,[esi].dsurf_lNextScan
941: mov [edi].width_b,ax
942: mov eax,[esi].dsurf_pvBitmapStart
943: mov [edi].lp_bits,eax
944: mov eax,[esi].dsurf_lNextPlane
945: mov [edi].next_plane,eax
946: mov al,[esi].dsurf_iFormat
947: shl bh,1
948: cmp al,BMF_PHYSDEVICE
949: sete ah
950: or bh,ah
951: errnz F0_SRC_IS_DEV-00001000b
952: errnz F0_DEST_IS_DEV-0000010b
953:
954: shl bh,1
955: cmp al,BMF_1BPP
956: je copy_dev_20 ;Mono, color bit is clear
957: or bh,F0_DEST_IS_COLOR
958: errnz F0_SRC_IS_COLOR-00000100b
959: errnz F0_DEST_IS_COLOR-00000001b
960:
961: ; The source may be a packed pel source, which will have to be converted
962:
963: cmp al,BMF_DEVICE ;4 plane format
964: je copy_dev_20 ; Yes, no packed pel conversion
965: cmp al,BMF_PHYSDEVICE
966: je copy_dev_20 ; ditto
967:
968: mov [ebp].iFormat,al ;Save source format
969: mov [ebp].fb,PPC_NEEDED ;Show conversion needed
970: copy_dev_20:
971:
972: mov al,bh ;Set IS_COLOR and IS_DEVICE
973: and al,IS_COLOR+IS_DEVICE ; flags in the Device Flags
974: errnz IS_COLOR-F0_DEST_IS_COLOR ;Must be same bits
975: mov [edi].dev_flags,al
976: cRet copy_dev
977:
978: endProc copy_dev
979:
980:
981: ;----------------------------Private-Routine----------------------------;
982: ; pattern_preprocessing
983: ;
984: ; If a pattern is needed, make sure that it isn't a hollow
985: ; brush. If it is a hollow brush, then return an error.
986: ;
987: ; The type of brush to use will be set, and the brush pointer
988: ; updated to point to the mono bits if the mono brush will be
989: ; used. The type of brush used will match the destination device.
990: ;
991: ; If the destination is mono and the source is color, then a mono
992: ; brush fetch will be used, with the color brush munged in advance
993: ; according to the background/foreground colors passed:
994: ;
995: ; All brush pixels which match the background color should be set
996: ; to white (1). All other brush pixels should be set to black (0).
997: ;
998: ; If the physical color is stored as all 1's or 0's for each
999: ; plane, then by XORing the physical color for a plane with the
1000: ; corresponding byte in the brush, and ORing the results, this
1001: ; will give 0's where the color matched, and 1's where the colors
1002: ; didn't match. Inverting this result will then give 1's where
1003: ; the brush matched the background color and 0's where it did not.
1004: ;
1005: ; If both the source and destination are color, or the source is mono
1006: ; and the destination color, then the color portion of the brush will
1007: ; be used.
1008: ;
1009: ; If both the source and destination are mono, then the monochrome
1010: ; portion of the brush will be used.
1011: ;
1012: ; Entry:
1013: ; BH = fr.the_flags
1014: ; EBP = fr
1015: ; Returns:
1016: ; Carry flag clear if no error
1017: ; Error Returns:
1018: ; Carry flag set if error (null lpPBrush, or hollow brush)
1019: ; Registers Preserved:
1020: ; EBP
1021: ; Registers Destroyed:
1022: ; AX,BX,CX,DX,SI,DI,DS,ES,flags
1023: ; Calls:
1024: ; None
1025: ; History:
1026: ; Sat 15-Aug-1987 18:20:34 -by- Wesley O. Rupel [wesleyr]
1027: ; Added 4-plane support.
1028: ; Sun 22-Feb-1987 16:29:09 -by- Walt Moore [waltm]
1029: ; Created.
1030: ;-----------------------------------------------------------------------;
1031:
1032: ;------------------------------Pseudo-Code------------------------------;
1033: ; {
1034: ; }
1035: ;-----------------------------------------------------------------------;
1036:
1037: cProc pattern_preprocessing
1038:
1039: mov [ebp].the_flags,bh ;Save flag values
1040: test bh,F0_PAT_PRESENT ;Pattern required?
1041: jz pattern_preproc_end ; No, skip pattern check
1042: mov esi,[ebp].lpPBrush ;--> physical brush
1043: mov dl,[esi].oem_brush_accel;Save brush accelerator
1044: mov [ebp].brush_accel,dl ; in local variable frame
1045: test dl,SOLID_BRUSH ;Don't need to rotate a solid brush
1046: jnz pattern_preproc_end
1047:
1048: ; !!! hack-o-ramma. rotate the brush on the frame for now.
1049:
1050: push ebx
1051: mov edx,00000111b
1052: lea edi,[ebp].a_brush.oem_brush_C0 ;EDI --> temp brush area
1053: mov [ebp].lpPBrush,edi
1054: mov ebx,[ebp].pptlBrush
1055: mov ecx,dword ptr [ebx][0]
1056: and ecx,edx
1057: mov ebx,dword ptr [ebx][4]
1058: and ebx,edx
1059: mov ch,4
1060:
1061: pattern_preproc_color:
1062: lodsb
1063: and ebx,edx
1064: ror al,cl
1065: mov byte ptr [edi][ebx],al
1066: inc ebx
1067:
1068: lodsb
1069: and ebx,edx
1070: ror al,cl
1071: mov byte ptr [edi][ebx],al
1072: inc ebx
1073:
1074: lodsb
1075: and ebx,edx
1076: ror al,cl
1077: mov byte ptr [edi][ebx],al
1078: inc ebx
1079:
1080: lodsb
1081: and ebx,edx
1082: ror al,cl
1083: mov byte ptr [edi][ebx],al
1084: inc ebx
1085:
1086: lodsb
1087: and ebx,edx
1088: ror al,cl
1089: mov byte ptr [edi][ebx],al
1090: inc ebx
1091:
1092: lodsb
1093: and ebx,edx
1094: ror al,cl
1095: mov byte ptr [edi][ebx],al
1096: inc ebx
1097:
1098: lodsb
1099: and ebx,edx
1100: ror al,cl
1101: mov byte ptr [edi][ebx],al
1102: inc ebx
1103:
1104: lodsb
1105: and ebx,edx
1106: ror al,cl
1107: mov byte ptr [edi][ebx],al
1108: inc ebx
1109:
1110: add edi,8
1111: dec ch
1112: jnz pattern_preproc_color
1113: pop ebx
1114:
1115: pattern_preproc_end:
1116: clc
1117: cRet pattern_preprocessing
1118:
1119: endProc pattern_preprocessing
1120:
1121:
1122: ;----------------------------Private-Routine----------------------------;
1123: ; compute_y
1124: ;
1125: ; Compute y-related parameters.
1126: ;
1127: ; The parameters related to the Y coordinate and BLT direction
1128: ; are computed. The parameters include:
1129: ;
1130: ; a) Index to next scan line
1131: ; b) Starting Y address calculation
1132: ; d) Index to next plane
1133: ;
1134: ; Entry:
1135: ; EBP --> DEV structure to use (src or dest)
1136: ; AX = Y coordinate
1137: ; ECX = BLT direction
1138: ; 0000 = Y+
1139: ; FFFF = Y-
1140: ; BX = inclusive Y extent
1141: ; Returns:
1142: ; ECX = BLT direction
1143: ; EBX = inclusive count
1144: ; Registers Preserved:
1145: ; EBP
1146: ; Registers Destroyed:
1147: ; AX,DX,SI,DI,flags
1148: ; Calls:
1149: ; None
1150: ; History:
1151: ;-----------------------------------------------------------------------;
1152:
1153: cProc compute_y
1154:
1155: movsx esi,[ebp].width_b ;Need bmWidthBytes a couple of times
1156: movzx eax,ax
1157: mul esi ;Compute Y address
1158: add [ebp].lp_bits,eax
1159: xor esi,ecx ;1's complement if Y-
1160: sub esi,ecx ;2's complement if Y-
1161:
1162: test [ebp].dev_flags,IS_DEVICE
1163: jnz compute_y_done
1164: test [ebp].dev_flags,IS_COLOR
1165: jz compute_y_done
1166:
1167:
1168: ; !!! I need to rewrite how next scan is handled. Currently, for +Y, next scan is 0,
1169: ; !!! and for -Y it is 2* -lNextScan
1170:
1171: add esi,esi ;Assume -Y (comp 2 * -lNextScan)
1172: and esi,ecx ;ESI = 0 if +Y, or 2 * -lNextScan
1173:
1174: compute_y_done:
1175: mov [ebp].next_scan,esi ;Set index to next scan line
1176: cRet compute_y ;All done with device, small bitmaps
1177:
1178: endProc compute_y
1179:
1180: _TEXT$02 ends
1181:
1182: end
1183:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.