|
|
1.1 root 1: ;---------------------------Module-Header------------------------------;
2: ; Module Name: fasttext.asm
3: ;
4: ; Copyright (c) 1992 Microsoft Corporation
5: ;-----------------------------------------------------------------------;
6: ;-----------------------------------------------------------------------;
7: ; BOOL vFastText(PDEV * ppdev, GLYPHPOS * pGlyphPos, ULONG ulGlyphCount,
8: ; PBYTE pTempBuffer, ULONG ulBufDelta, ULONG ulCharInc,
9: ; RECTL * prclText, RECTL * prclOpaque, INT iFgColor,
10: ; INT iBgColor, ULONG fDrawFlags);
11: ; ppdev -
12: ; pGlyphPos -
13: ; ulGlyphCount - # of glyphs to draw. Must never be 0.
14: ; pTempBuffer -
15: ; ulBufDelta - logical width of temp buffer in bytes. This value *must* be the
16: ; same number of bytes spanned by prclText; it is assumed that
17: ; scans in the temp buffer are contiguous
18: ; ulCharInc -
19: ; prclText -
20: ; prclOpaque -
21: ; iFgColor -
22: ; iBgColor -
23: ; fDrawFlags -
24: ;
25: ; Performs accelerated proportional text drawing.
26: ;
27: ;-----------------------------------------------------------------------;
28: ;
29: ; Note: The general opaque text back-end currently assumes that it will
30: ; never receive a text string with a bounding box that does not span at
31: ; least one quadpixel (the four pixels at a VGA screen address).
32: ;
33: ;-----------------------------------------------------------------------;
34: ;
35: ; Note: The term "quadpixel" means a four-pixel set stored across all
36: ; four planes of VGA memory in planar high-color mode. Quadpixels map to
37: ; nibbles in the temp buffer in which text is assembled, where nibbles
38: ; are always bits 4-7 or 0-3.
39: ;
40: ;-----------------------------------------------------------------------;
41: ;
42: ; Note: The direction flag is *not* explicitly set or cleared.
43: ;
44: ;-----------------------------------------------------------------------;
45: ;
46: ; Note: Assumes the text rectangle has a positive height and width. Will
47: ; not work properly if this is not the case.
48: ;
49: ;-----------------------------------------------------------------------;
50: ;
51: ; Set LOOP_UNROLL_SHIFT to the log2 of the number of times you want to unroll
52: ; loops in this module that are implemented with the unrolling macros. For
53: ; example, LOOP_UNROLL_SHIFT of 3 yields 2**3 = 8 times unrolling. This is
54: ; the only thing you need to change to control unrolling.
55:
56: LOOP_UNROLL_SHIFT equ 2
57:
58: ;-----------------------------------------------------------------------;
59:
60: comment $
61:
62: The overall approach of this module is to draw the text into a system
63: memory buffer, then copy the buffer to the screen a word at a time
64: using write mode 2 and clever use of the VGA hardware so that no OUTs
65: and a minimum of display memory reads are required. The clever use is
66: setting the ALUs to XOR, the latches to the background color, and the
67: write mode to 2, so each nibble in bits 0-3 written by the CPU turns
68: into 0 or 0ffh for that plane. Then the Bit Mask is set to fg ^ bg,
69: so that common bits between the fg and bg are preserved, while non-
70: common bits are either preserved (=bg color) by a 0->000h bit for
71: that plane, or flipped (=fg color) by a 1->0ffh bit for that plane. The
72: Map Mask is used to clip edges; no read before write is required. Note
73: that bits 0-3 must be reversed to match the order of pixels in planes
74: 0-3. Note also that we write a whole word, containing two nibbles in
75: bits 0-3 of each byte, at once, to draw 8 pixels per write.
76:
77: commend $
78:
79: .386
80:
81: ifndef DOS_PLATFORM
82: .model small,c
83: else
84: ifdef STD_CALL
85: .model small,c
86: else
87: .model small,pascal
88: endif; STD_CALL
89: endif; DOS_PLATFORM
90:
91: assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
92: assume fs:nothing,gs:nothing
93:
94: .xlist
95: include stdcall.inc ;calling convention cmacros
96: include i386\strucs.inc
97: include i386\driver.inc
98: include i386\egavga.inc
99: include i386\unroll.inc
100:
101: .list
102:
103: ;-----------------------------------------------------------------------;
104:
105: .data
106:
107: ;-----------------------------------------------------------------------;
108: ; Tables used to branch into glyph-drawing optimizations.
109: ;
110: ; Handles narrow (1-4 bytes wide) glyph drawing, for case where initial byte
111: ; should be MOVed even if it's not aligned (intended for use in drawing the
112: ; first glyph in a string). Table format is:
113: ; Bits 3-2: dest width
114: ; Bit 1 : 1 if don't need last source byte, 0 if do need last source byte
115: ; Bit 0 : 1 if no rotation (aligned), 0 if rotation (non-aligned)
116: align 4
117: MovInitialTableNarrow label dword
118: dd draw_prop_done ;0 wide
119: dd draw_prop_done ;0 wide
120: dd draw_prop_done ;0 wide
121: dd draw_prop_done ;0 wide
122: dd mov_first_1_wide_rotated_need_last ;nonalign, 1 wide, need last
123: dd mov_first_1_wide_unrotated ;aligned, 1 wide
124: dd mov_first_1_wide_rotated_no_last ;nonalign, 1 wide, no last
125: dd mov_first_1_wide_unrotated ;aligned, 1 wide
126: dd mov_first_2_wide_rotated_need_last ;nonalign, 2 wide, need last
127: dd mov_first_2_wide_unrotated ;aligned, 2 wide
128: dd mov_first_2_wide_rotated_no_last ;nonalign, 2 wide, no last
129: dd mov_first_2_wide_unrotated ;aligned, 2 wide
130: dd mov_first_3_wide_rotated_need_last ;nonalign, 3 wide, need last
131: dd mov_first_3_wide_unrotated ;aligned, 3 wide
132: dd mov_first_3_wide_rotated_no_last ;nonalign, 3 wide, no last
133: dd mov_first_3_wide_unrotated ;aligned, 3 wide
134: dd mov_first_4_wide_rotated_need_last ;nonalign, 4 wide, need last
135: dd mov_first_4_wide_unrotated ;aligned, 4 wide
136: dd mov_first_4_wide_rotated_no_last ;nonalign, 4 wide, no last
137: dd mov_first_4_wide_unrotated ;aligned, 4 wide
138:
139: ; Handles narrow (1-4 bytes wide) glyph drawing, for case where initial byte
140: ; ORed if it's not aligned (intended for use in drawing all but the first glyph
141: ; in a string). Table format is:
142: ; Bits 3-2: dest width
143: ; Bit 1 : 1 if don't need last source byte, 0 if do need last source byte
144: ; Bit 0 : 1 if no rotation (aligned), 0 if rotation (non-aligned)
145: align 4
146: OrInitialTableNarrow label dword
147: dd draw_prop_done ;0 wide
148: dd draw_prop_done ;0 wide
149: dd draw_prop_done ;0 wide
150: dd draw_prop_done ;0 wide
151: dd or_first_1_wide_rotated_need_last ;nonalign, 1 wide, need last
152: dd mov_first_1_wide_unrotated ;aligned, 1 wide
153: dd or_first_1_wide_rotated_no_last ;nonalign, 1 wide, no last
154: dd mov_first_1_wide_unrotated ;aligned, 1 wide
155: dd or_first_2_wide_rotated_need_last ;nonalign, 2 wide, need last
156: dd mov_first_2_wide_unrotated ;aligned, 2 wide
157: dd or_first_2_wide_rotated_no_last ;nonalign, 2 wide, no last
158: dd mov_first_2_wide_unrotated ;aligned, 2 wide
159: dd or_first_3_wide_rotated_need_last ;nonalign, 3 wide, need last
160: dd mov_first_3_wide_unrotated ;aligned, 3 wide
161: dd or_first_3_wide_rotated_no_last ;nonalign, 3 wide, no last
162: dd mov_first_3_wide_unrotated ;aligned, 3 wide
163: dd or_first_4_wide_rotated_need_last ;nonalign, 4 wide, need last
164: dd mov_first_4_wide_unrotated ;aligned, 4 wide
165: dd or_first_4_wide_rotated_no_last ;nonalign, 4 wide, no last
166: dd mov_first_4_wide_unrotated ;aligned, 4 wide
167:
168: ; Handles narrow (1-4 bytes wide) glyph drawing, for case where all bytes
169: ; should be ORed (intended for use in drawing potentially overlapping glyphs).
170: ; Table format is:
171: ; Bits 3-2: dest width
172: ; Bit 1 : 1 if don't need last source byte, 0 if do need last source byte
173: ; Bit 0 : 1 if no rotation (aligned), 0 if rotation (non-aligned)
174: align 4
175: OrAllTableNarrow label dword
176: dd draw_prop_done ;0 wide
177: dd draw_prop_done ;0 wide
178: dd draw_prop_done ;0 wide
179: dd draw_prop_done ;0 wide
180: dd or_all_1_wide_rotated_need_last ;nonalign, 1 wide, need last
181: dd or_all_1_wide_unrotated ;aligned, 1 wide
182: dd or_all_1_wide_rotated_no_last ;nonalign, 1 wide, no last
183: dd or_all_1_wide_unrotated ;aligned, 1 wide
184: dd or_all_2_wide_rotated_need_last ;nonalign, 2 wide, need last
185: dd or_all_2_wide_unrotated ;aligned, 2 wide
186: dd or_all_2_wide_rotated_no_last ;nonalign, 2 wide, no last
187: dd or_all_2_wide_unrotated ;aligned, 2 wide
188: dd or_all_3_wide_rotated_need_last ;nonalign, 3 wide, need last
189: dd or_all_3_wide_unrotated ;aligned, 3 wide
190: dd or_all_3_wide_rotated_no_last ;nonalign, 3 wide, no last
191: dd or_all_3_wide_unrotated ;aligned, 3 wide
192: dd or_all_4_wide_rotated_need_last ;nonalign, 4 wide, need last
193: dd or_all_4_wide_unrotated ;aligned, 4 wide
194: dd or_all_4_wide_rotated_no_last ;nonalign, 4 wide, no last
195: dd or_all_4_wide_unrotated ;aligned, 4 wide
196:
197: ; Handles arbitrarily wide glyph drawing, for case where initial byte should be
198: ; MOVed even if it's not aligned (intended for use in drawing the first glyph
199: ; in a string). Table format is:
200: ; Bit 1 : 1 if don't need last source byte, 0 if do need last source byte
201: ; Bit 0 : 1 if no rotation (aligned), 0 if rotation (non-aligned)
202: align 4
203: MovInitialTableWide label dword
204: dd mov_first_N_wide_rotated_need_last ;nonalign, need last
205: dd mov_first_N_wide_unrotated ;aligned
206: dd mov_first_N_wide_rotated_no_last ;nonalign, no last
207: dd mov_first_N_wide_unrotated ;aligned
208:
209: ; Handles arbitrarily wide glyph drawing, for case where initial byte should be
210: ; ORed if it's not aligned (intended for use in drawing all but the first glyph
211: ; in a string). Table format is:
212: ; Bit 1 : 1 if don't need last source byte, 0 if do need last source byte
213: ; Bit 0 : 1 if no rotation (aligned), 0 if rotation (non-aligned)
214: align 4
215: OrInitialTableWide label dword
216: dd or_first_N_wide_rotated_need_last ;nonalign, need last
217: dd mov_first_N_wide_unrotated ;aligned
218: dd or_first_N_wide_rotated_no_last ;nonalign, no last
219: dd mov_first_N_wide_unrotated ;aligned
220:
221: ; Handles arbitrarily wide glyph drawing, for case where all bytes should
222: ; be ORed (intended for use in drawing potentially overlapping glyphs).
223: ; Table format is:
224: ; Bit 1 : 1 if don't need last source byte, 0 if do need last source byte
225: ; Bit 0 : 1 if no rotation (aligned), 0 if rotation (non-aligned)
226: align 4
227: OrAllTableWide label dword
228: dd or_all_N_wide_rotated_need_last ;nonalign, need last
229: dd or_all_N_wide_unrotated ;aligned
230: dd or_all_N_wide_rotated_no_last ;nonalign, no last
231: dd or_all_N_wide_unrotated ;aligned
232:
233: ; Vectors to entry points for drawing various types of text. '*' means works as
234: ; is but could be acclerated with a custom scanning loop.
235: align 4
236: MasterTextTypeTable label dword ;tops aligned overlap fixed pitch
237: dd draw_nf_ntb_o_to_temp_start ; N N N *
238: dd draw_f_ntb_o_to_temp_start ; N N Y *
239: dd draw_nf_ntb_o_to_temp_start ; N Y N
240: dd draw_f_ntb_o_to_temp_start ; N Y Y
241: dd draw_nf_tb_no_to_temp_start ; Y N N
242: dd draw_f_tb_no_to_temp_start ; Y N Y
243: dd draw_nf_ntb_o_to_temp_start ; Y Y N *
244: dd draw_f_ntb_o_to_temp_start ; Y Y Y *
245:
246: ;-----------------------------------------------------------------------
247: ; Tables of pointers to optimizations for drawing up to four pixels
248: ; of transparent text based on the upper or lower nibble of a byte.
249: align 4
250: xpar_high_nibble_table label dword
251: dd xpar_high_nibble_0
252: dd xpar_high_nibble_1
253: dd xpar_high_nibble_2
254: dd xpar_high_nibble_3
255: dd xpar_high_nibble_4
256: dd xpar_high_nibble_5
257: dd xpar_high_nibble_6
258: dd xpar_high_nibble_7
259: dd xpar_high_nibble_8
260: dd xpar_high_nibble_9
261: dd xpar_high_nibble_A
262: dd xpar_high_nibble_B
263: dd xpar_high_nibble_C
264: dd xpar_high_nibble_D
265: dd xpar_high_nibble_E
266: dd xpar_high_nibble_F
267:
268: align 4
269: xpar_low_nibble_table label dword
270: dd xpar_low_nibble_0
271: dd xpar_low_nibble_1
272: dd xpar_low_nibble_2
273: dd xpar_low_nibble_3
274: dd xpar_low_nibble_4
275: dd xpar_low_nibble_5
276: dd xpar_low_nibble_6
277: dd xpar_low_nibble_7
278: dd xpar_low_nibble_8
279: dd xpar_low_nibble_9
280: dd xpar_low_nibble_A
281: dd xpar_low_nibble_B
282: dd xpar_low_nibble_C
283: dd xpar_low_nibble_D
284: dd xpar_low_nibble_E
285: dd xpar_low_nibble_F
286:
287: ; Masks for clipping for the four possible left and right edge alignments
288: jOpaqueLeftMasks label byte
289: db 0ffh,00eh,00ch,008h
290:
291: jOpaqueRightMasks label byte
292: db 0ffh,001h,003h,007h
293:
294: ;-----------------------------------------------------------------------;
295:
296: .code
297:
298: ;-----------------------------------------------------------------------;
299:
300: cProc vFastText,44,<\
301: uses esi edi ebx,\
302: ppdev:ptr,\
303: pGlyphPos:ptr,\
304: ulGlyphCount:dword,\
305: pTempBuffer:ptr,\
306: ulBufDelta:dword,\
307: ulCharInc:dword,\
308: prclText:ptr,\
309: prclOpaque:ptr,\
310: iFgColor:dword,\
311: iBgColor:dword,\
312: fDrawFlags:dword>
313:
314: local ulGlyDelta:dword ;width per scan of source glyph, in bytes
315: local ulWidthInBytes:dword ;width of glyph, in bytes
316: local ulTmpWidthInBytes:dword ;working byte-width count
317: local ulGlyphX:dword ;for fixed-pitch text, maintains the current
318: ; glyph's left-edge X coordinate
319: local pGlyphLoop:dword ;pointer to glyph-processing loop
320: local ulTempLeft:dword ;X coordinate on screen of left edge of temp
321: ; buffer
322: local pfnEntry:dword ;pointer to unrolled loop entry point
323: local ulXparBytes:dword ;general loop count storage
324: local ulTmpSrcDelta:dword ;distance from end of one buffer text scan to
325: ; start of next
326: local ulTmpDstDelta:dword ;distance from end of one screen text scan to
327: ; start of next
328: local ulTopScan:dword ;top scan of dest text rect in current bank
329: local ulBottomScan:dword ;bottom scan of dest text rect
330: local ulNumScans:dword ;# of scans to draw
331: local ulScreenDelta:dword ;scan-to-scan offset in screen
332: local ulScreenDeltaLinear:dword ;scan-to-scan offset in screen when in
333: ; nice, neat linear packed-pixel mode
334: local ulTextWidthInBytesMinus1:dword ;# of bytes across spanned by
335: ; text, minus 1
336: local pScreen:dword ;pointer to first screen byte to which to draw
337: local pfnEdgeVector:dword ;pointer to routine to draw any needed edges
338: local pfnFirstOpaqVector:dword ;pointer to initial drawing routine
339: ; called for opaque (either whole
340: ; bytes, or edge(s) if no whole bytes)
341: local ulWholeWidthInQuadpixelPairs:dword ;# of quadpixel pairs to copy
342: local ulWholeWidthInQuadpixelPairsMinus1:dword ;# of whole bytes to
343: ; copy - 1
344: local ulOddQuadpixel:dword ;1 if odd quadpixel in quadpixel-pair
345: ; copy
346: local ulTextLeft:dword ;left edge of leftmost glyph
347: local ulLeftMask:dword ;for opaque text, left edge mask for string
348: local ulRightMask:dword ;for opaque text, right edge mask for string
349: local ulUnrolledCount:dword ;# of unrolled loop reps
350: local ulUnrolledOddCount:dword ;# of unrolled loop odd reps
351: local ulYOrigin:dword ;Y origin of text in string (all glyphs are at
352: ; the same Y origin)
353: local pGlyphFlipTable:dword ;pointer to look-up table used to reverse
354: ; the order of bits 0-3 and 4-7
355: local ulLeftEdgeShift:dword ;amount by which to right-shift left-edge
356: ; nibbles during opaque expansion to
357: ; right-justify them (0 or 4)
358: local ulRightEdgeShift:dword ;amount by which to right-shift right-edge
359: ; nibbles during opaque expansion to
360: ; right-justify them (0 or 4)
361: local ulVGAWidthInBytesMinus1:dword ;# of VGA addresses from left edge
362: ; to right edge of destination
363:
364: ;-----------------------------------------------------------------------;
365: ; Set the pointer to the table used to flip glyph bits 0-3 and 4-7. This
366: ; table is guaranteed to be on a 256-byte boundary, so look-up can be
367: ; performed simply by loading the low byte of a pointer register.
368: ;-----------------------------------------------------------------------;
369:
370: mov esi,ppdev
371: mov ebx,prclText ;point to bounding text rect during 486
372: ; interlock slot
373: mov eax,[esi].pdev_pjGlyphFlipTable
374: mov pGlyphFlipTable,eax
375:
376: ;-----------------------------------------------------------------------;
377: ; If 8 wide, byte aligned, and opaque, handle with very fast special-case
378: ; code.
379: ;-----------------------------------------------------------------------;
380:
381: cmp ulCharInc,8 ;8 wide?
382: jnz short @F ;no
383: cmp fDrawFlags,5 ;fixed pitch?
384: jnz short @F ;no
385: cmp prclOpaque,0 ;opaque?
386: jz short @F ;no
387: test [ebx].xLeft,111b ;byte aligned?
388: jz special_8_wide_aligned_opaque ;yes, special-case
389: @@:
390:
391: general_handler:
392:
393: mov esi,ppdev
394: mov eax,[ebx].yTop
395: mov ulTopScan,eax ;Y screen coordinate of top edge of temp buf
396: mov eax,[ebx].xLeft
397: and eax,not 7
398: mov ulTempLeft,eax ;X screen coordinate of left edge of temp buf
399:
400: mov eax,fDrawFlags
401:
402: mov edx,[ebx].yBottom
403: mov ulBottomScan,edx ;bottom scan of text area
404:
405: jmp MasterTextTypeTable[eax*4]
406:
407: ;-----------------------------------------------------------------------;
408: ; Entry point for fixed-pitch | tops and bottoms aligned | no overlap.
409: ; Sets up to draw first glyph.
410: ;-----------------------------------------------------------------------;
411: align 4
412: draw_f_tb_no_to_temp_start:
413: mov ebx,pGlyphPos ;point to the first glyph to draw
414: mov esi,[ebx].gp_pgdf ;point to glyph def
415:
416: mov edi,[ebx].gp_x ;dest X coordinate
417: sub edi,ulTempLeft ;adjust relative to the left of the
418: ; temp buffer (we assume the text is
419: ; right at the top of the text rect
420: ; and hence the buffer)
421: mov ulGlyphX,edi ;remember where this glyph started
422: mov esi,[esi].gdf_pgb ;point to glyph bits
423: mov pGlyphLoop,offset draw_f_tb_no_to_temp_loop
424: ;draw additional characters with this
425: ; loop
426: jmp short draw_to_temp_start_entry
427:
428: ;-----------------------------------------------------------------------;
429: ; Entry point for non-fixed-pitch | tops and bottoms aligned | no overlap.
430: ; Sets up to draw first glyph.
431: ;-----------------------------------------------------------------------;
432: align 4
433: draw_nf_tb_no_to_temp_start:
434: mov ebx,pGlyphPos ;point to the first glyph to draw
435: mov esi,[ebx].gp_pgdf ;point to glyph def
436:
437: mov edi,[ebx].gp_x ;dest X coordinate
438: sub edi,ulTempLeft ;adjust relative to the left of the
439: ; temp buffer
440: mov esi,[esi].gdf_pgb ;point to glyph bits
441: mov pGlyphLoop,offset draw_nf_tb_no_to_temp_loop
442: ;draw additional characters with this
443: ; loop
444: draw_to_temp_start_entry:
445: add edi,[esi].gb_x ;adjust to position of upper left glyph
446: ; corner in dest
447: ;BUGBUG add or sub?
448: mov ecx,edi
449: shr edi,3 ;byte offset of first column of glyph
450: ; offset of upper left of glyph in temp
451: ; buffer
452: add edi,pTempBuffer ;initial dest byte in temp buffer
453:
454: and ecx,111b ;bit alignment of upper left in temp
455:
456: ;calculate scan-to-scan glyph width
457: mov ebx,[esi].gb_cx ;glyph width in pixels
458:
459: lea eax,[ebx+ecx+7]
460: shr eax,3 ;# of dest bytes per scan
461:
462: add ebx,7
463: shr ebx,3 ;# of source bytes per scan
464:
465: mov edx,ulBufDelta ;width of destination buffer in bytes
466:
467: cmp eax,4 ;do we have special case code for this
468: ; dest width?
469: ja short @F ;no, handle as general case
470: ;yes, handle as special case
471: cmp ebx,eax ;carry if more dest than source bytes
472: ; (last source byte not needed)
473: rcl eax,1 ;factor last source byte status in
474: cmp cl,1 ;carry if aligned
475: rcl eax,1 ;factor in alignment (aligned or not)
476: mov ebx,[esi].gb_cy ;# of scans in glyph
477: add esi,gb_aj ;point to the first glyph byte
478:
479: jmp MovInitialTableNarrow[eax*4]
480: ;branch to draw the first glyph; never
481: ; need to OR first glyph, because
482: ; there's nothing there yet
483:
484: align 4
485: @@: ;too wide to special case
486: mov ulWidthInBytes,eax ;# of bytes across dest
487: cmp ebx,eax ;carry if more dest than source bytes
488: ; (last source byte not needed)
489: mov eax,0
490: rcl eax,1 ;factor last source byte status in
491: cmp cl,1 ;carry if aligned
492: rcl eax,1 ;factor in alignment (aligned or not)
493:
494: mov ebx,[esi].gb_cx ;glyph width in pixels
495: add ebx,7
496: shr ebx,3 ;glyph width in bytes
497: mov ulGlyDelta,ebx
498:
499: mov ebx,[esi].gb_cy ;# of scans in glyph
500: add esi,gb_aj ;point to the first glyph byte
501:
502: jmp MovInitialTableWide[eax*4]
503: ;branch to draw the first glyph; never
504: ; need to OR first glyph, because
505: ; there's nothing there yet
506:
507: ;-----------------------------------------------------------------------;
508: ; Entry point for fixed-pitch | tops and bottoms not aligned | overlap.
509: ; Sets up to draw first glyph.
510: ;-----------------------------------------------------------------------;
511: align 4
512: draw_f_ntb_o_to_temp_start:
513: mov ebx,pGlyphPos ;point to the first glyph to draw
514: mov pGlyphLoop,offset draw_f_ntb_o_to_temp_loop
515: ;draw additional characters with this
516: ; loop
517: mov edi,[ebx].gp_x ;dest X coordinate
518: mov esi,[ebx].gp_pgdf ;point to glyph def
519: sub edi,ulTempLeft ;adjust relative to the left of the
520: ; temp buffer
521: mov ulGlyphX,edi ;remember where this glyph started
522: mov esi,[esi].gdf_pgb ;point to glyph bits
523: add edi,[esi].gb_x ;adjust to position of upper left glyph
524: ; corner in dest
525: mov ecx,edi
526: shr edi,3 ;byte offset of first column of glyph
527: ; offset of upper left of glyph in temp
528: ; buffer
529: jmp short draw_to_temp_start_entry2
530:
531: ;-----------------------------------------------------------------------;
532: ; Entry point for non-fixed-pitch | tops and bottoms not aligned | overlap.
533: ; Sets up to draw first glyph.
534: ;-----------------------------------------------------------------------;
535: align 4
536: draw_nf_ntb_o_to_temp_start:
537: mov ebx,pGlyphPos ;point to the first glyph to draw
538: mov pGlyphLoop,offset draw_nf_ntb_o_to_temp_loop
539: ;draw additional characters with this
540: ; loop
541: mov edi,[ebx].gp_x ;dest X coordinate
542: mov esi,[ebx].gp_pgdf ;point to glyph def
543: sub edi,ulTempLeft ;adjust relative to the left of the
544: ; temp buffer
545: mov esi,[esi].gdf_pgb ;point to glyph bits
546: add edi,[esi].gb_x ;adjust to position of upper left glyph
547: ; corner in dest
548: ;BUGBUG add or sub?
549: mov ecx,edi
550: shr edi,3 ;byte offset of first column of glyph
551: ; offset of upper left of glyph in temp
552: ; buffer
553: draw_to_temp_start_entry2:
554: mov eax,[ebx].gp_y ;dest origin Y coordinate
555: sub eax,ulTopScan ;coord of glyph origin in temp buffer
556: mov ulYOrigin,eax ;remember the Y origin of all glyphs
557: ; (necessary because glyph positions
558: ; after first aren't set for fixed-
559: ; pitch strings)
560: add eax,[esi].gb_y ;adjust to position of upper left glyph
561: ; corner in dest
562: mul ulBufDelta ;offset in buffer of top glyph scan
563: add eax,pTempBuffer ;initial dest byte
564: add edi,eax
565:
566: and ecx,111b ;bit alignment of upper left in temp
567:
568: ;calculate scan-to-scan glyph width
569: mov ebx,[esi].gb_cx ;glyph width in pixels
570:
571: lea eax,[ebx+ecx+7]
572: shr eax,3 ;# of dest bytes per scan
573:
574: add ebx,7
575: shr ebx,3 ;# of source bytes per scan
576:
577: mov edx,ulBufDelta ;width of destination buffer in bytes
578:
579: cmp eax,4 ;do we have special case code for this
580: ; dest width?
581: ja short @F ;no, handle as general case
582: ;yes, handle as special case
583: cmp ebx,eax ;carry if more dest than source bytes
584: ; (last source byte not needed)
585: rcl eax,1 ;factor last source byte status in
586: cmp cl,1 ;carry if aligned
587: rcl eax,1 ;factor in alignment (aligned or not)
588: mov ebx,[esi].gb_cy ;# of scans in glyph
589: add esi,gb_aj ;point to the first glyph byte
590:
591: jmp OrAllTableNarrow[eax*4] ;branch to draw the first glyph; OR all
592: ; glyphs, because text may overlap
593:
594: align 4
595: @@: ;too wide to special case
596: mov ulWidthInBytes,eax ;# of bytes across dest
597: cmp ebx,eax ;carry if more dest than source bytes
598: ; (last source byte not needed)
599: mov eax,0
600: rcl eax,1 ;factor last source byte status in
601: cmp cl,1 ;carry if aligned
602: rcl eax,1 ;factor in alignment (aligned or not)
603:
604: mov ebx,[esi].gb_cx ;glyph width in pixels
605: add ebx,7
606: shr ebx,3 ;glyph width in bytes
607: mov ulGlyDelta,ebx
608:
609: mov ebx,[esi].gb_cy ;# of scans in glyph
610: add esi,gb_aj ;point to the first glyph byte
611:
612: jmp OrAllTableWide[eax*4] ;branch to draw the first glyph; OR all ; glyphs, because text may overlap never
613: ; glyphs, because text may overlap
614:
615: ;-----------------------------------------------------------------------;
616: ; Loop to draw all fixed-pitch | tops and bottoms aligned | no overlap
617: ; glyphs after first.
618: ;-----------------------------------------------------------------------;
619: align 4
620: draw_f_tb_no_to_temp_loop:
621: dec ulGlyphCount ;any more glyphs to draw?
622: jz draw_to_screen ;no, done
623: mov ebx,pGlyphPos
624: add ebx,size GLYPHPOS ;point to the next glyph (the one
625: mov pGlyphPos,ebx ; we're going to draw this time)
626: mov esi,[ebx].gp_pgdf ;point to glyph def
627:
628: mov edi,ulGlyphX ;last glyph's dest X start in temp buf
629: add edi,ulCharInc ;this glyph's dest X start in temp buf
630: mov ulGlyphX,edi ;remember for next glyph
631: mov esi,[esi].gdf_pgb ;point to glyph bits
632: jmp short draw_to_temp_loop_entry
633:
634: ;-----------------------------------------------------------------------;
635: ; Loop to draw all non-fixed-pitch | tops and bottoms aligned | no overlap
636: ; glyphs after first.
637: ;-----------------------------------------------------------------------;
638: align 4
639: draw_nf_tb_no_to_temp_loop:
640: dec ulGlyphCount ;any more glyphs to draw?
641: jz draw_to_screen ;no, done
642: mov ebx,pGlyphPos
643: add ebx,size GLYPHPOS ;point to the next glyph (the one we're
644: mov pGlyphPos,ebx ; going to draw this time)
645: mov esi,[ebx].gp_pgdf ;point to glyph def
646: mov edi,[ebx].gp_x ;dest X coordinate
647: mov esi,[esi].gdf_pgb ;point to glyph bits
648: sub edi,ulTempLeft ;adjust relative to the left edge of
649: ; the temp buffer
650:
651: draw_to_temp_loop_entry:
652: add edi,[esi].gb_x ;adjust to position of upper left glyph
653: ; corner in dest
654: mov ecx,edi ;pixel X coordinate in temp buffer
655: shr edi,3 ;byte offset of first column = dest
656: ; offset of upper left of glyph in temp
657: ; buffer
658: add edi,pTempBuffer ;initial dest byte
659:
660: and ecx,111b ;bit alignment of upper left in temp
661:
662: ;calculate scan-to-scan glyph width
663: mov ebx,[esi].gb_cx ;glyph width in pixels
664:
665: lea eax,[ebx+ecx+7]
666: shr eax,3 ;# of dest bytes to copy to per scan
667:
668: add ebx,7
669: shr ebx,3 ;# of source bytes to copy from per
670: ; scan
671: mov edx,ulBufDelta ;width of destination buffer in bytes
672:
673: cmp eax,4 ;do we have special case code for this
674: ; dest width?
675: ja short @F ;no, handle as general case
676: ;yes, handle as special case
677: cmp ebx,eax ;carry if more dest than source bytes
678: ; (last source byte not needed)
679: rcl eax,1 ;factor last source byte status in
680: cmp cl,1 ;carry if aligned
681: rcl eax,1 ;factor in alignment (aligned or not)
682: mov ebx,[esi].gb_cy ;# of scans in glyph
683: add esi,gb_aj ;point to the first glyph byte
684:
685: jmp OrInitialTableNarrow[eax*4] ;branch to draw the first glyph;
686: ; need to OR the 1st byte if
687: ; non-aligned to avoid overwriting
688: ; what's already there
689: align 4
690: @@: ;too wide to special case
691: mov ulWidthInBytes,eax ;# of bytes across dest
692: cmp ebx,eax ;carry if more dest than source bytes
693: ; (last source byte not needed)
694: mov eax,0
695: rcl eax,1 ;factor last source byte status in
696: cmp cl,1 ;carry if aligned
697: rcl eax,1 ;factor in alignment (aligned or not)
698:
699: mov ebx,[esi].gb_cx ;glyph width in pixels
700: add ebx,7
701: shr ebx,3 ;glyph width in bytes
702: mov ulGlyDelta,ebx
703:
704: mov ebx,[esi].gb_cy ;# of scans in glyph
705: add esi,gb_aj ;point to the first glyph byte
706:
707: jmp OrInitialTableWide[eax*4] ;branch to draw the next glyph;
708: ; need to OR the 1st byte if
709: ; non-aligned to avoid overwriting
710: ; what's already there
711:
712: ;-----------------------------------------------------------------------;
713: ; Loop to draw all fixed-pitch | tops and bottoms not aligned | overlap
714: ; glyphs after first.
715: ;-----------------------------------------------------------------------;
716: align 4
717: draw_f_ntb_o_to_temp_loop:
718: dec ulGlyphCount ;any more glyphs to draw?
719: jz draw_to_screen ;no, done
720: mov ebx,pGlyphPos
721: add ebx,size GLYPHPOS ;point to the next glyph (the one we're
722: mov pGlyphPos,ebx ; going to draw this time)
723:
724: mov esi,[ebx].gp_pgdf ;point to glyph def
725: mov edi,ulGlyphX ;last glyph's dest X start in temp buf
726: add edi,ulCharInc ;this glyph's dest X start in temp buf
727: mov ulGlyphX,edi ;remember for next glyph
728: mov esi,[esi].gdf_pgb ;point to glyph bits
729:
730: jmp short draw_to_temp_loop_entry2
731:
732: ;-----------------------------------------------------------------------;
733: ; Loop to draw all non-fixed-pitch | tops and bottoms not aligned | overlap
734: ; glyphs after first.
735: ;-----------------------------------------------------------------------;
736: align 4
737: draw_nf_ntb_o_to_temp_loop:
738: dec ulGlyphCount ;any more glyphs to draw?
739: jz draw_to_screen ;no, done
740: mov ebx,pGlyphPos
741: add ebx,size GLYPHPOS ;point to the next glyph (the one we're
742: mov pGlyphPos,ebx ; going to draw this time)
743:
744: mov esi,[ebx].gp_pgdf ;point to glyph def
745: mov edi,[ebx].gp_x ;dest X coordinate
746: mov esi,[esi].gdf_pgb ;point to glyph bits
747: sub edi,ulTempLeft ;adjust relative to the left edge of
748: ; the temp buffer
749: draw_to_temp_loop_entry2:
750: add edi,[esi].gb_x ;adjust to position of upper left glyph
751: ; corner in dest
752: mov ecx,edi ;pixel X coordinate in temp buffer
753: shr edi,3 ;byte offset of first column = dest
754: ; offset of upper left of glyph in temp
755: ; buffer
756: mov eax,ulYOrigin ;dest Y coordinate
757:
758: add eax,[esi].gb_y ;adjust to position of upper left glyph
759: ; corner in dest
760: mul ulBufDelta ;offset in buffer of top glyph scan
761: add eax,pTempBuffer ;initial dest byte
762: add edi,eax
763:
764: and ecx,111b ;bit alignment of upper left in temp
765:
766: ;calculate scan-to-scan glyph width
767: mov ebx,[esi].gb_cx ;glyph width in pixels
768:
769: lea eax,[ebx+ecx+7]
770: shr eax,3 ;# of dest bytes to copy to per scan
771:
772: add ebx,7
773: shr ebx,3 ;# of source bytes to copy from per
774: ; scan
775: mov edx,ulBufDelta ;width of destination buffer in bytes
776:
777: cmp eax,4 ;do we have special case code for this
778: ; dest width?
779: ja short @F ;no, handle as general case
780: ;yes, handle as special case
781: cmp ebx,eax ;carry if more dest than source bytes
782: ; (last source byte not needed)
783: rcl eax,1 ;factor last source byte status in
784: cmp cl,1 ;carry if aligned
785: rcl eax,1 ;factor in alignment (aligned or not)
786: mov ebx,[esi].gb_cy ;# of scans in glyph
787: add esi,gb_aj ;point to the first glyph byte
788:
789: jmp OrAllTableNarrow[eax*4] ;branch to draw the next glyph
790:
791: align 4
792: @@: ;too wide to special case
793: mov ulWidthInBytes,eax ;# of bytes across dest
794: cmp ebx,eax ;carry if more dest than source bytes
795: ; (last source byte not needed)
796: mov eax,0
797: rcl eax,1 ;factor last source byte status in
798: cmp cl,1 ;carry if aligned
799: rcl eax,1 ;factor in alignment (aligned or not)
800:
801: mov ebx,[esi].gb_cx ;glyph width in pixels
802: add ebx,7
803: shr ebx,3 ;glyph width in bytes
804: mov ulGlyDelta,ebx
805:
806: mov ebx,[esi].gb_cy ;# of scans in glyph
807: add esi,gb_aj ;point to the first glyph byte
808:
809: jmp OrAllTableWide[eax*4] ;branch to draw the next glyph
810:
811: ;-----------------------------------------------------------------------;
812: ; Routines to draw all scans of a single glyph into the temp buffer,
813: ; optimized for the following cases:
814: ;
815: ; 1 to 4 byte-wide destination rectangles for each of:
816: ; No rotation needed
817: ; Rotation needed, same # of source as dest bytes needed
818: ; Rotation needed, one less source than dest bytes needed
819: ;
820: ; Additionally, the three cases are handled for 5 and wider cases by a
821: ; general routine for each case.
822: ;
823: ; If rotation is needed, there are three sorts of routines:
824: ;
825: ; 1) The leftmost byte is MOVed, to initialize the byte. Succeeding bytes are
826: ; MOVed. This is generally used for the leftmost glyph of a string.
827: ; 2) The leftmost byte is ORed into the existing byte. Succeeding bytes are
828: ; MOVed. This is generally used after the leftmost glyph, because this may
829: ; not be the first data written to that byte.
830: ; 3) All bytes are ORed. This is for drawing when characters might overlap.
831: ;
832: ; If rotation is not needed, there are two sorts of routines:
833: ;
834: ; 1) The leftmost byte is MOVed, to initialize the byte. Succeeding bytes are
835: ; MOVed. This is generally used for the leftmost glyph of a string.
836: ; 2) All bytes are ORed. This is for drawing when characters might overlap.
837: ;
838: ; On entry:
839: ; EBX = # of scans to copy
840: ; CL = right rotation
841: ; EDX = ulBufDelta = width per scan of destination buffer, in bytes
842: ; ESI = pointer to first glyph byte
843: ; EDI = pointer to first dest buffer byte
844: ; DF = cleared
845: ; ulGlyDelta = width per scan of source glyph, in bytes (wide case only)
846: ; ulWidthInBytes = width of glyph, in bytes (required only for 5 and
847: ; wider cases)
848: ;
849: ; On exit:
850: ; Any or all of EAX, EBX, ECX, EDX, ESI, and EDI may be trashed.
851:
852: ;-----------------------------------------------------------------------;
853: ; OR first byte, 1 byte wide dest, rotated.
854: ;-----------------------------------------------------------------------;
855: align 4
856: or_all_1_wide_rotated_need_last:
857: or_all_1_wide_rotated_no_last:
858: or_first_1_wide_rotated_need_last:
859: or_first_1_wide_rotated_no_last:
860: SET_UP_UNROLL_AND_BRANCH ebx,eax,or_first_1_wide_rotated_table, \
861: LOOP_UNROLL_SHIFT
862:
863: UNROLL_LOOP_ENTRY_TABLE or_first_1_wide_rotated_table,OF1WR, \
864: LOOP_UNROLL_COUNT
865:
866: MOR_FIRST_1_WIDE_ROTATED macro ENTRY_LABEL,ENTRY_INDEX
867: &ENTRY_LABEL&ENTRY_INDEX&:
868: mov ch,[esi]
869: inc esi
870: shr ch,cl
871: or [edi],ch
872: add edi,edx
873: endm ;-----------------------------------;
874:
875: align 4
876: or_first_1_wide_rotated_loop:
877: UNROLL_LOOP MOR_FIRST_1_WIDE_ROTATED,OF1WR,LOOP_UNROLL_COUNT
878: dec ebx
879: jnz or_first_1_wide_rotated_loop
880: jmp pGlyphLoop
881:
882: ;-----------------------------------------------------------------------;
883: ; MOV first byte, 1 byte wide dest, rotated.
884: ;-----------------------------------------------------------------------;
885: align 4
886: mov_first_1_wide_rotated_need_last:
887: mov_first_1_wide_rotated_no_last:
888: SET_UP_UNROLL_AND_BRANCH ebx,eax,mov_first_1_wide_rotated_table, \
889: LOOP_UNROLL_SHIFT
890:
891: UNROLL_LOOP_ENTRY_TABLE mov_first_1_wide_rotated_table,MF1WR, \
892: LOOP_UNROLL_COUNT
893:
894: MMOV_FIRST_1_WIDE_ROTATED macro ENTRY_LABEL,ENTRY_INDEX
895: &ENTRY_LABEL&ENTRY_INDEX&:
896: mov ch,[esi]
897: inc esi
898: shr ch,cl
899: mov [edi],ch
900: add edi,edx
901: endm ;-----------------------------------;
902:
903: align 4
904: mov_first_1_wide_rotated_loop:
905: UNROLL_LOOP MMOV_FIRST_1_WIDE_ROTATED,MF1WR,LOOP_UNROLL_COUNT
906: dec ebx
907: jnz mov_first_1_wide_rotated_loop
908: jmp pGlyphLoop
909:
910: ;-----------------------------------------------------------------------;
911: ; MOV first byte, 1 byte wide dest, unrotated.
912: ;-----------------------------------------------------------------------;
913: align 4
914: mov_first_1_wide_unrotated:
915:
916: SET_UP_UNROLL_AND_BRANCH ebx,eax,mov_first_1_wide_unrotated_table, \
917: LOOP_UNROLL_SHIFT
918:
919: UNROLL_LOOP_ENTRY_TABLE mov_first_1_wide_unrotated_table,MF1WU, \
920: LOOP_UNROLL_COUNT
921:
922: MMOV_FIRST_1_WIDE_UNROTATED macro ENTRY_LABEL,ENTRY_INDEX
923: &ENTRY_LABEL&ENTRY_INDEX&:
924: mov al,[esi]
925: inc esi
926: mov [edi],al
927: add edi,edx
928: endm ;-----------------------------------;
929:
930: align 4
931: mov_first_1_wide_unrotated_loop:
932: UNROLL_LOOP MMOV_FIRST_1_WIDE_UNROTATED,MF1WU,LOOP_UNROLL_COUNT
933: dec ebx
934: jnz mov_first_1_wide_unrotated_loop
935: jmp pGlyphLoop
936:
937: ;-----------------------------------------------------------------------;
938: ; OR all bytes, 1 byte wide dest, unrotated.
939: ;-----------------------------------------------------------------------;
940: align 4
941: or_all_1_wide_unrotated:
942: SET_UP_UNROLL_AND_BRANCH ebx,eax,or_all_1_wide_unrotated_table, \
943: LOOP_UNROLL_SHIFT
944:
945: UNROLL_LOOP_ENTRY_TABLE or_all_1_wide_unrotated_table,OA1WU, \
946: LOOP_UNROLL_COUNT
947:
948: MOR_ALL_1_WIDE_UNROTATED macro ENTRY_LABEL,ENTRY_INDEX
949: &ENTRY_LABEL&ENTRY_INDEX&:
950: mov al,[esi]
951: inc esi
952: or [edi],al
953: add edi,edx
954: endm ;-----------------------------------;
955:
956: align 4
957: or_all_1_wide_unrotated_loop:
958: UNROLL_LOOP MOR_ALL_1_WIDE_UNROTATED,OA1WU,LOOP_UNROLL_COUNT
959: dec ebx
960: jnz or_all_1_wide_unrotated_loop
961: jmp pGlyphLoop
962:
963: ;-----------------------------------------------------------------------;
964: ; OR first byte, 2 bytes wide dest, rotated, need final source byte.
965: ;-----------------------------------------------------------------------;
966: align 4
967: or_first_2_wide_rotated_need_last:
968: SET_UP_UNROLL_AND_BRANCH ebx,eax,or_first_2_wide_rotated_need_table, \
969: LOOP_UNROLL_SHIFT
970:
971: UNROLL_LOOP_ENTRY_TABLE or_first_2_wide_rotated_need_table,OF2WRN, \
972: LOOP_UNROLL_COUNT
973:
974: MOR_FIRST_2_WIDE_ROTATED_NEED macro ENTRY_LABEL,ENTRY_INDEX
975: &ENTRY_LABEL&ENTRY_INDEX&:
976: mov ax,[esi]
977: add esi,2
978: ror ax,cl
979: or [edi],al
980: mov [edi+1],ah
981: add edi,edx
982: endm ;-----------------------------------;
983:
984: align 4
985: or_first_2_wide_rotated_need_loop:
986: UNROLL_LOOP MOR_FIRST_2_WIDE_ROTATED_NEED,OF2WRN,LOOP_UNROLL_COUNT
987: dec ebx
988: jnz or_first_2_wide_rotated_need_loop
989: jmp pGlyphLoop
990:
991: ;-----------------------------------------------------------------------;
992: ; OR all bytes, 2 bytes wide dest, rotated, need final source byte.
993: ;-----------------------------------------------------------------------;
994: align 4
995: or_all_2_wide_rotated_need_last:
996: SET_UP_UNROLL_AND_BRANCH ebx,eax,or_all_2_wide_rotated_need_table, \
997: LOOP_UNROLL_SHIFT
998:
999: UNROLL_LOOP_ENTRY_TABLE or_all_2_wide_rotated_need_table,OA2WRN, \
1000: LOOP_UNROLL_COUNT
1001:
1002: MOR_ALL_2_WIDE_ROTATED_NEED macro ENTRY_LABEL,ENTRY_INDEX
1003: &ENTRY_LABEL&ENTRY_INDEX&:
1004: mov ax,[esi]
1005: add esi,2
1006: ror ax,cl
1007: or [edi],ax
1008: add edi,edx
1009: endm ;-----------------------------------;
1010:
1011: align 4
1012: or_all_2_wide_rotated_need_loop:
1013: UNROLL_LOOP MOR_ALL_2_WIDE_ROTATED_NEED,OA2WRN,LOOP_UNROLL_COUNT
1014: dec ebx
1015: jnz or_all_2_wide_rotated_need_loop
1016: jmp pGlyphLoop
1017:
1018: ;-----------------------------------------------------------------------;
1019: ; MOV first byte, 2 bytes wide dest, rotated, need final source byte.
1020: ;-----------------------------------------------------------------------;
1021: align 4
1022: mov_first_2_wide_rotated_need_last:
1023: SET_UP_UNROLL_AND_BRANCH ebx,eax,mov_first_2_wide_rotated_need_table, \
1024: LOOP_UNROLL_SHIFT
1025:
1026: UNROLL_LOOP_ENTRY_TABLE mov_first_2_wide_rotated_need_table,MF2WRN, \
1027: LOOP_UNROLL_COUNT
1028:
1029: MMOV_FIRST_2_WIDE_ROTATED_NEED macro ENTRY_LABEL,ENTRY_INDEX
1030: &ENTRY_LABEL&ENTRY_INDEX&:
1031: mov ax,[esi]
1032: add esi,2
1033: ror ax,cl
1034: mov [edi],ax
1035: add edi,edx
1036: endm ;-----------------------------------;
1037:
1038: align 4
1039: mov_first_2_wide_rotated_need_loop:
1040: UNROLL_LOOP MMOV_FIRST_2_WIDE_ROTATED_NEED,MF2WRN,LOOP_UNROLL_COUNT
1041: dec ebx
1042: jnz mov_first_2_wide_rotated_need_loop
1043: jmp pGlyphLoop
1044:
1045: ;-----------------------------------------------------------------------;
1046: ; OR first byte, 2 bytes wide dest, rotated, don't need final source byte.
1047: ;-----------------------------------------------------------------------;
1048: align 4
1049: or_first_2_wide_rotated_no_last:
1050: SET_UP_UNROLL_AND_BRANCH ebx,eax,or_first_2_wide_rotated_table, \
1051: LOOP_UNROLL_SHIFT
1052:
1053: UNROLL_LOOP_ENTRY_TABLE or_first_2_wide_rotated_table,OF2WR, \
1054: LOOP_UNROLL_COUNT
1055:
1056: MOR_FIRST_2_WIDE_ROTATED macro ENTRY_LABEL,ENTRY_INDEX
1057: &ENTRY_LABEL&ENTRY_INDEX&:
1058: sub eax,eax
1059: mov ah,[esi]
1060: inc esi
1061: shr eax,cl
1062: or [edi],ah
1063: mov [edi+1],al
1064: add edi,edx
1065: endm ;-----------------------------------;
1066:
1067: align 4
1068: or_first_2_wide_rotated_loop:
1069: UNROLL_LOOP MOR_FIRST_2_WIDE_ROTATED,OF2WR,LOOP_UNROLL_COUNT
1070: dec ebx
1071: jnz or_first_2_wide_rotated_loop
1072: jmp pGlyphLoop
1073:
1074: ;-----------------------------------------------------------------------;
1075: ; OR all bytes, 2 bytes wide dest, rotated, don't need final source byte.
1076: ;-----------------------------------------------------------------------;
1077: align 4
1078: or_all_2_wide_rotated_no_last:
1079: SET_UP_UNROLL_AND_BRANCH ebx,eax,or_all_2_wide_rotated_table, \
1080: LOOP_UNROLL_SHIFT
1081:
1082: UNROLL_LOOP_ENTRY_TABLE or_all_2_wide_rotated_table,OA2WR, \
1083: LOOP_UNROLL_COUNT
1084:
1085: MOR_ALL_2_WIDE_ROTATED macro ENTRY_LABEL,ENTRY_INDEX
1086: &ENTRY_LABEL&ENTRY_INDEX&:
1087: sub eax,eax
1088: mov al,[esi]
1089: inc esi
1090: ror ax,cl
1091: or [edi],ax
1092: add edi,edx
1093: endm ;-----------------------------------;
1094:
1095: align 4
1096: or_all_2_wide_rotated_loop:
1097: UNROLL_LOOP MOR_ALL_2_WIDE_ROTATED,OA2WR,LOOP_UNROLL_COUNT
1098: dec ebx
1099: jnz or_all_2_wide_rotated_loop
1100: jmp pGlyphLoop
1101:
1102: ;-----------------------------------------------------------------------;
1103: ; MOV first byte, 2 bytes wide dest, rotated, don't need final source byte.
1104: ;-----------------------------------------------------------------------;
1105: align 4
1106: mov_first_2_wide_rotated_no_last:
1107: SET_UP_UNROLL_AND_BRANCH ebx,eax,mov_first_2_wide_rotated_table, \
1108: LOOP_UNROLL_SHIFT
1109:
1110: UNROLL_LOOP_ENTRY_TABLE mov_first_2_wide_rotated_table,MF2WR, \
1111: LOOP_UNROLL_COUNT
1112:
1113: MMOV_FIRST_2_WIDE_ROTATED macro ENTRY_LABEL,ENTRY_INDEX
1114: &ENTRY_LABEL&ENTRY_INDEX&:
1115: sub eax,eax
1116: mov al,[esi]
1117: inc esi
1118: ror ax,cl
1119: mov [edi],ax
1120: add edi,edx
1121: endm ;-----------------------------------;
1122:
1123: align 4
1124: mov_first_2_wide_rotated_loop:
1125: UNROLL_LOOP MMOV_FIRST_2_WIDE_ROTATED,MF2WR,LOOP_UNROLL_COUNT
1126: dec ebx
1127: jnz mov_first_2_wide_rotated_loop
1128: jmp pGlyphLoop
1129:
1130: ;-----------------------------------------------------------------------;
1131: ; MOV first byte, 2 bytes wide dest, unrotated.
1132: ;-----------------------------------------------------------------------;
1133: align 4
1134: mov_first_2_wide_unrotated:
1135: SET_UP_UNROLL_AND_BRANCH ebx,eax,mov_first_2_wide_unrotated_table, \
1136: LOOP_UNROLL_SHIFT
1137:
1138: UNROLL_LOOP_ENTRY_TABLE mov_first_2_wide_unrotated_table,MF2WU, \
1139: LOOP_UNROLL_COUNT
1140:
1141: MMOV_FIRST_2_WIDE_UNROTATED macro ENTRY_LABEL,ENTRY_INDEX
1142: &ENTRY_LABEL&ENTRY_INDEX&:
1143: mov ax,[esi]
1144: add esi,2
1145: mov [edi],ax
1146: add edi,edx
1147: endm ;-----------------------------------;
1148:
1149: align 4
1150: mov_first_2_wide_unrotated_loop:
1151: UNROLL_LOOP MMOV_FIRST_2_WIDE_UNROTATED,MF2WU,LOOP_UNROLL_COUNT
1152: dec ebx
1153: jnz mov_first_2_wide_unrotated_loop
1154: jmp pGlyphLoop
1155:
1156: ;-----------------------------------------------------------------------;
1157: ; OR all bytes, 2 bytes wide dest, unrotated.
1158: ;-----------------------------------------------------------------------;
1159: align 4
1160: or_all_2_wide_unrotated:
1161: SET_UP_UNROLL_AND_BRANCH ebx,eax,or_all_2_wide_unrotated_table, \
1162: LOOP_UNROLL_SHIFT
1163:
1164: UNROLL_LOOP_ENTRY_TABLE or_all_2_wide_unrotated_table,OA2WU, \
1165: LOOP_UNROLL_COUNT
1166:
1167: MOR_ALL_2_WIDE_UNROTATED macro ENTRY_LABEL,ENTRY_INDEX
1168: &ENTRY_LABEL&ENTRY_INDEX&:
1169: mov ax,[esi]
1170: add esi,2
1171: or [edi],ax
1172: add edi,edx
1173: endm ;-----------------------------------;
1174:
1175: align 4
1176: or_all_2_wide_unrotated_loop:
1177: UNROLL_LOOP MOR_ALL_2_WIDE_UNROTATED,OA2WU,LOOP_UNROLL_COUNT
1178: dec ebx
1179: jnz or_all_2_wide_unrotated_loop
1180: jmp pGlyphLoop
1181:
1182: ;-----------------------------------------------------------------------;
1183: ; OR first byte, 3 bytes wide dest, rotated, need final source byte.
1184: ;-----------------------------------------------------------------------;
1185: align 4
1186: or_first_3_wide_rotated_need_last:
1187: @@:
1188: mov al,[esi]
1189: shr al,cl
1190: or [edi],al
1191: mov ax,[esi]
1192: ror ax,cl
1193: mov [edi+1],ah
1194: mov ax,[esi+1]
1195: add esi,3
1196: ror ax,cl
1197: mov [edi+2],ah
1198: add edi,edx
1199: dec ebx
1200: jnz @B
1201: jmp pGlyphLoop
1202:
1203: ;-----------------------------------------------------------------------;
1204: ; OR first byte, 3 bytes wide dest, rotated, need final source byte.
1205: ;-----------------------------------------------------------------------;
1206: align 4
1207: or_all_3_wide_rotated_need_last:
1208: @@:
1209: mov al,[esi]
1210: shr al,cl
1211: or [edi],al
1212: mov ax,[esi]
1213: ror ax,cl
1214: or [edi+1],ah
1215: mov ax,[esi+1]
1216: add esi,3
1217: ror ax,cl
1218: or [edi+2],ah
1219: add edi,edx
1220: dec ebx
1221: jnz @B
1222: jmp pGlyphLoop
1223:
1224: ;-----------------------------------------------------------------------;
1225: ; MOV first byte, 3 bytes wide dest, rotated, need final source byte.
1226: ;-----------------------------------------------------------------------;
1227: align 4
1228: mov_first_3_wide_rotated_need_last:
1229: @@:
1230: mov al,[esi]
1231: shr al,cl
1232: mov [edi],al
1233: mov ax,[esi]
1234: ror ax,cl
1235: mov [edi+1],ah
1236: mov ax,[esi+1]
1237: add esi,3
1238: ror ax,cl
1239: mov [edi+2],ah
1240: add edi,edx
1241: dec ebx
1242: jnz @B
1243: jmp pGlyphLoop
1244:
1245: ;-----------------------------------------------------------------------;
1246: ; OR first byte, 3 bytes wide dest, rotated, don't need final source byte.
1247: ;-----------------------------------------------------------------------;
1248: align 4
1249: or_first_3_wide_rotated_no_last:
1250: neg cl
1251: and cl,111b ;convert from right shift to left shift
1252: @@:
1253: sub eax,eax
1254: mov ax,[esi]
1255: add esi,2
1256: xchg ah,al
1257: shl eax,cl
1258: mov [edi+1],ah
1259: mov [edi+2],al
1260: shr eax,16
1261: or [edi],al
1262: add edi,edx
1263: dec ebx
1264: jnz @B
1265: jmp pGlyphLoop
1266:
1267: ;-----------------------------------------------------------------------;
1268: ; OR all bytes, 3 bytes wide dest, rotated, don't need final source byte.
1269: ;-----------------------------------------------------------------------;
1270: align 4
1271: or_all_3_wide_rotated_no_last:
1272: neg cl
1273: and cl,111b ;convert from right shift to left shift
1274: @@:
1275: sub eax,eax
1276: mov ax,[esi]
1277: add esi,2
1278: xchg ah,al
1279: shl eax,cl
1280: xchg ah,al
1281: or [edi+1],ax
1282: shr eax,16
1283: or [edi],al
1284: add edi,edx
1285: dec ebx
1286: jnz @B
1287: jmp pGlyphLoop
1288:
1289: ;-----------------------------------------------------------------------;
1290: ; MOV first byte, 3 bytes wide dest, rotated, don't need final source byte.
1291: ;-----------------------------------------------------------------------;
1292: align 4
1293: mov_first_3_wide_rotated_no_last:
1294: neg cl
1295: and cl,111b ;convert from right shift to left shift
1296: @@:
1297: sub eax,eax
1298: mov ax,[esi]
1299: add esi,2
1300: xchg ah,al
1301: shl eax,cl
1302: mov [edi+1],ah
1303: mov [edi+2],al
1304: shr eax,16
1305: mov [edi],al
1306: add edi,edx
1307: dec ebx
1308: jnz @B
1309: jmp pGlyphLoop
1310:
1311: ;-----------------------------------------------------------------------;
1312: ; MOV first byte, 3 bytes wide dest, unrotated.
1313: ;-----------------------------------------------------------------------;
1314: align 4
1315: mov_first_3_wide_unrotated:
1316: @@:
1317: mov ax,[esi]
1318: mov [edi],ax
1319: mov al,[esi+2]
1320: add esi,3
1321: mov [edi+2],al
1322: add edi,edx
1323: dec ebx
1324: jnz @B
1325: jmp pGlyphLoop
1326:
1327: ;-----------------------------------------------------------------------;
1328: ; OR all bytes, 3 bytes wide dest, unrotated.
1329: ;-----------------------------------------------------------------------;
1330: align 4
1331: or_all_3_wide_unrotated:
1332: @@:
1333: mov ax,[esi]
1334: or [edi],ax
1335: mov al,[esi+2]
1336: add esi,3
1337: or [edi+2],al
1338: add edi,edx
1339: dec ebx
1340: jnz @B
1341: jmp pGlyphLoop
1342:
1343: ;-----------------------------------------------------------------------;
1344: ; OR first byte, 4 bytes wide dest, rotated, need final source byte.
1345: ;-----------------------------------------------------------------------;
1346: align 4
1347: or_first_4_wide_rotated_need_last:
1348: @@:
1349: mov eax,[esi]
1350: add esi,4
1351: xchg ah,al
1352: ror eax,16
1353: xchg ah,al
1354: shr eax,cl
1355: xchg ah,al
1356: mov [edi+2],ax
1357: shr eax,16
1358: mov [edi+1],al
1359: or [edi],ah
1360: add edi,edx
1361: dec ebx
1362: jnz @B
1363: jmp pGlyphLoop
1364:
1365: ;-----------------------------------------------------------------------;
1366: ; OR all bytes, 4 bytes wide dest, rotated, need final source byte.
1367: ;-----------------------------------------------------------------------;
1368: align 4
1369: or_all_4_wide_rotated_need_last:
1370: @@:
1371: mov eax,[esi]
1372: add esi,4
1373: xchg ah,al
1374: ror eax,16
1375: xchg ah,al
1376: shr eax,cl
1377: xchg ah,al
1378: ror eax,16
1379: xchg al,ah
1380: or [edi],eax
1381: add edi,edx
1382: dec ebx
1383: jnz @B
1384: jmp pGlyphLoop
1385:
1386: ;-----------------------------------------------------------------------;
1387: ; MOV first byte, 4 bytes wide dest, rotated, need final source byte.
1388: ;-----------------------------------------------------------------------;
1389: align 4
1390: mov_first_4_wide_rotated_need_last:
1391: @@:
1392: mov eax,[esi]
1393: add esi,4
1394: xchg ah,al
1395: ror eax,16
1396: xchg ah,al
1397: shr eax,cl
1398: xchg ah,al
1399: ror eax,16
1400: xchg ah,al
1401: mov [edi],eax
1402: add edi,edx
1403: dec ebx
1404: jnz @B
1405: jmp pGlyphLoop
1406:
1407: ;-----------------------------------------------------------------------;
1408: ; OR first byte, 4 bytes wide dest, rotated, don't need final source byte.
1409: ;-----------------------------------------------------------------------;
1410: align 4
1411: or_first_4_wide_rotated_no_last:
1412: @@:
1413: mov ax,[esi]
1414: xchg ah,al
1415: shl eax,16
1416: mov ah,[esi+2]
1417: add esi,3
1418: shr eax,cl
1419: xchg ah,al
1420: mov [edi+2],ax
1421: shr eax,16
1422: mov [edi+1],al
1423: or [edi],ah
1424: add edi,edx
1425: dec ebx
1426: jnz @B
1427: jmp pGlyphLoop
1428:
1429: ;-----------------------------------------------------------------------;
1430: ; OR all bytes, 4 bytes wide dest, rotated, don't need final source byte.
1431: ;-----------------------------------------------------------------------;
1432: align 4
1433: or_all_4_wide_rotated_no_last:
1434: @@:
1435: mov ax,[esi]
1436: xchg ah,al
1437: shl eax,16
1438: mov ah,[esi+2]
1439: add esi,3
1440: shr eax,cl
1441: xchg ah,al
1442: ror eax,16
1443: xchg ah,al
1444: or [edi],eax
1445: add edi,edx
1446: dec ebx
1447: jnz @B
1448: jmp pGlyphLoop
1449:
1450: ;-----------------------------------------------------------------------;
1451: ; MOV first byte, 4 bytes wide dest, rotated, don't need final source byte.
1452: ;-----------------------------------------------------------------------;
1453: align 4
1454: mov_first_4_wide_rotated_no_last:
1455: @@:
1456: mov ax,[esi]
1457: xchg ah,al
1458: shl eax,16
1459: mov ah,[esi+2]
1460: add esi,3
1461: shr eax,cl
1462: xchg ah,al
1463: ror eax,16
1464: xchg ah,al
1465: mov [edi],eax
1466: add edi,edx
1467: dec ebx
1468: jnz @B
1469: jmp pGlyphLoop
1470:
1471: ;-----------------------------------------------------------------------;
1472: ; MOV first byte, 4 bytes wide dest, unrotated.
1473: ;-----------------------------------------------------------------------;
1474: align 4
1475: mov_first_4_wide_unrotated:
1476: @@:
1477: mov eax,[esi]
1478: add esi,4
1479: mov [edi],eax
1480: add edi,edx
1481: dec ebx
1482: jnz @B
1483: jmp pGlyphLoop
1484:
1485: ;-----------------------------------------------------------------------;
1486: ; OR all bytes, 4 bytes wide dest, unrotated.
1487: ;-----------------------------------------------------------------------;
1488: align 4
1489: or_all_4_wide_unrotated:
1490: @@:
1491: mov eax,[esi]
1492: add esi,4
1493: or [edi],eax
1494: add edi,edx
1495: dec ebx
1496: jnz @B
1497: jmp pGlyphLoop
1498:
1499: ;-----------------------------------------------------------------------;
1500: ; OR first byte, n bytes wide dest, rotated, need final source byte.
1501: ;-----------------------------------------------------------------------;
1502: align 4
1503: or_first_N_wide_rotated_need_last:
1504: mov eax,ulWidthInBytes
1505: mov edx,ulBufDelta
1506: sub edx,eax
1507: mov ulTmpDstDelta,edx
1508: dec eax ;source doesn't advance after first byte, and
1509: ; we do the first byte outside the loop
1510: mov edx,ulGlyDelta
1511: sub edx,eax
1512: mov ulTmpSrcDelta,edx
1513: mov ulTmpWidthInBytes,eax
1514: ofNwrnl_scan_loop:
1515: mov al,[esi] ;do the initial, ORed byte separately
1516: shr al,cl
1517: or [edi],al
1518: inc edi
1519: mov edx,ulTmpWidthInBytes
1520: @@:
1521: mov ax,[esi]
1522: inc esi
1523: ror ax,cl
1524: mov [edi],ah
1525: inc edi
1526: dec edx
1527: jnz @B
1528: add esi,ulTmpSrcDelta
1529: add edi,ulTmpDstDelta
1530: dec ebx
1531: jnz ofNwrnl_scan_loop
1532: jmp pGlyphLoop
1533:
1534: ;-----------------------------------------------------------------------;
1535: ; OR all bytes, n bytes wide dest, rotated, need final source byte.
1536: ;-----------------------------------------------------------------------;
1537: align 4
1538: or_all_N_wide_rotated_need_last:
1539: mov eax,ulWidthInBytes
1540: mov edx,ulBufDelta
1541: sub edx,eax
1542: mov ulTmpDstDelta,edx
1543: dec eax ;source doesn't advance after first byte, and
1544: ; we do the first byte outside the loop
1545: mov edx,ulGlyDelta
1546: sub edx,eax
1547: mov ulTmpSrcDelta,edx
1548: mov ulTmpWidthInBytes,eax
1549: oaNwrnl_scan_loop:
1550: mov al,[esi] ;do the initial, ORed byte separately
1551: shr al,cl
1552: or [edi],al
1553: inc edi
1554: mov edx,ulTmpWidthInBytes
1555: @@:
1556: mov ax,[esi]
1557: inc esi
1558: ror ax,cl
1559: or [edi],ah
1560: inc edi
1561: dec edx
1562: jnz @B
1563: add esi,ulTmpSrcDelta
1564: add edi,ulTmpDstDelta
1565: dec ebx
1566: jnz oaNwrnl_scan_loop
1567: jmp pGlyphLoop
1568:
1569: ;-----------------------------------------------------------------------;
1570: ; MOV first byte, n bytes wide dest, rotated, need final source byte.
1571: ;-----------------------------------------------------------------------;
1572: align 4
1573: mov_first_N_wide_rotated_need_last:
1574: mov eax,ulWidthInBytes
1575: mov edx,ulBufDelta
1576: sub edx,eax
1577: mov ulTmpDstDelta,edx
1578: mov eax,ulWidthInBytes
1579: dec eax ;source doesn't advance after first byte, and
1580: ; we do the first byte outside the loop
1581: mov edx,ulGlyDelta
1582: sub edx,eax
1583: mov ulTmpSrcDelta,edx
1584: mov ulTmpWidthInBytes,eax
1585: mfNwrnl_scan_loop:
1586: mov al,[esi] ;do the initial byte separately
1587: shr al,cl
1588: mov [edi],al
1589: inc edi
1590: mov edx,ulTmpWidthInBytes
1591: @@:
1592: mov ax,[esi]
1593: inc esi
1594: ror ax,cl
1595: mov [edi],ah
1596: inc edi
1597: dec edx
1598: jnz @B
1599: add esi,ulTmpSrcDelta
1600: add edi,ulTmpDstDelta
1601: dec ebx
1602: jnz mfNwrnl_scan_loop
1603: jmp pGlyphLoop
1604:
1605: ;-----------------------------------------------------------------------;
1606: ; OR first byte, N bytes wide dest, rotated, don't need final source byte.
1607: ;-----------------------------------------------------------------------;
1608: align 4
1609: or_first_N_wide_rotated_no_last:
1610: mov eax,ulWidthInBytes
1611: dec eax ;one less because we don't advance after the
1612: ; last byte
1613: mov edx,ulBufDelta
1614: sub edx,eax
1615: mov ulTmpDstDelta,edx
1616: dec eax ;source doesn't advance after first byte, and
1617: ; we do the first & last bytes outside the
1618: ; loop; already subtracted 1 above
1619: mov edx,ulGlyDelta
1620: sub edx,eax
1621: mov ulTmpSrcDelta,edx
1622: mov ulTmpWidthInBytes,eax
1623: ofNwr_scan_loop:
1624: mov al,[esi] ;do the initial, ORed byte separately
1625: shr al,cl
1626: or [edi],al
1627: inc edi
1628: mov edx,ulTmpWidthInBytes
1629: @@:
1630: mov ax,[esi]
1631: inc esi
1632: ror ax,cl
1633: mov [edi],ah
1634: inc edi
1635: dec edx
1636: jnz @B
1637:
1638: mov ah,[esi] ;do the final byte separately
1639: sub al,al
1640: shr eax,cl
1641: mov [edi],al
1642:
1643: add esi,ulTmpSrcDelta
1644: add edi,ulTmpDstDelta
1645: dec ebx
1646: jnz ofNwr_scan_loop
1647: jmp pGlyphLoop
1648:
1649: ;-----------------------------------------------------------------------;
1650: ; OR all bytes, N bytes wide dest, rotated, don't need final source byte.
1651: ;-----------------------------------------------------------------------;
1652: align 4
1653: or_all_N_wide_rotated_no_last:
1654: mov eax,ulWidthInBytes
1655: dec eax ;one less because we don't advance after the
1656: ; last byte
1657: mov edx,ulBufDelta
1658: sub edx,eax
1659: mov ulTmpDstDelta,edx
1660: dec eax ;source doesn't advance after first byte, and
1661: ; we do the first & last bytes outside the
1662: ; loop; already subtracted 1 above
1663: mov edx,ulGlyDelta
1664: sub edx,eax
1665: mov ulTmpSrcDelta,edx
1666: mov ulTmpWidthInBytes,eax
1667: oaNwr_scan_loop:
1668: mov al,[esi] ;do the initial, ORed byte separately
1669: shr al,cl
1670: or [edi],al
1671: inc edi
1672: mov edx,ulTmpWidthInBytes
1673: @@:
1674: mov ax,[esi]
1675: inc esi
1676: ror ax,cl
1677: or [edi],ah
1678: inc edi
1679: dec edx
1680: jnz @B
1681:
1682: mov ah,[esi] ;do the final byte separately
1683: sub al,al
1684: shr eax,cl
1685: or [edi],al
1686:
1687: add esi,ulTmpSrcDelta
1688: add edi,ulTmpDstDelta
1689: dec ebx
1690: jnz oaNwr_scan_loop
1691: jmp pGlyphLoop
1692:
1693: ;-----------------------------------------------------------------------;
1694: ; MOV first byte, N bytes wide dest, rotated, don't need final source byte.
1695: ;-----------------------------------------------------------------------;
1696: align 4
1697: mov_first_N_wide_rotated_no_last:
1698: mov eax,ulWidthInBytes
1699: dec eax ;one less because we don't advance after the
1700: ; last byte
1701: mov edx,ulBufDelta
1702: sub edx,eax
1703: mov ulTmpDstDelta,edx
1704: dec eax ;source doesn't advance after first byte, and
1705: ; we do the first & last bytes outside the
1706: ; loop; already subtracted 1 above
1707: mov edx,ulGlyDelta
1708: sub edx,eax
1709: mov ulTmpSrcDelta,edx
1710: mov ulTmpWidthInBytes,eax
1711: mfNwr_scan_loop:
1712: mov al,[esi] ;do the initial byte separately
1713: shr al,cl
1714: mov [edi],al
1715: inc edi
1716: mov edx,ulTmpWidthInBytes
1717: @@:
1718: mov ax,[esi]
1719: inc esi
1720: ror ax,cl
1721: mov [edi],ah
1722: inc edi
1723: dec edx
1724: jnz @B
1725:
1726: mov ah,[esi] ;do the final byte separately
1727: sub al,al
1728: shr eax,cl
1729: mov [edi],al
1730:
1731: add esi,ulTmpSrcDelta
1732: add edi,ulTmpDstDelta
1733: dec ebx
1734: jnz mfNwr_scan_loop
1735: jmp pGlyphLoop
1736:
1737: ;-----------------------------------------------------------------------;
1738: ; MOV first byte, N bytes wide dest, unrotated.
1739: ;-----------------------------------------------------------------------;
1740: align 4
1741: mov_first_N_wide_unrotated:
1742: mov edx,ulBufDelta
1743: mov eax,ulWidthInBytes
1744: sub edx,eax
1745: shr eax,1 ;width in words
1746: jc short odd_width ;there's at least one odd byte
1747: shr eax,1 ;width in dwords
1748: jc short two_odd_bytes ;there's an odd word
1749: ;copy width is a dword multiple
1750: @@:
1751: mov ecx,eax
1752: rep movsd ;copy as many dwords as possible
1753: add edi,edx
1754: dec ebx
1755: jnz @B
1756: jmp pGlyphLoop
1757:
1758: align 4
1759: odd_width:
1760: shr eax,1 ;width in dwords
1761: jc short three_odd_bytes ;there's an odd word and an odd byte
1762: ;there's just an odd byte
1763: inc edx ;because we won't advance after last byte
1764: @@:
1765: mov ecx,eax
1766: rep movsd ;copy as many dwords as possible
1767: mov cl,[esi]
1768: inc esi
1769: mov [edi],cl
1770: add edi,edx
1771: dec ebx
1772: jnz @B
1773: jmp pGlyphLoop
1774:
1775: align 4
1776: two_odd_bytes:
1777: add edx,2 ;because we won't advance after last word
1778: @@:
1779: mov ecx,eax
1780: rep movsd ;copy as many dwords as possible
1781: mov cx,[esi]
1782: add esi,2
1783: mov [edi],cx
1784: add edi,edx
1785: dec ebx
1786: jnz @B
1787: jmp pGlyphLoop
1788:
1789: align 4
1790: three_odd_bytes:
1791: add edx,3 ;because we won't advance after last word/byte
1792: @@:
1793: mov ecx,eax
1794: rep movsd ;copy as many dwords as possible
1795: mov cx,[esi]
1796: mov [edi],cx
1797: mov cl,[esi+2]
1798: add esi,3
1799: mov [edi+2],cl
1800: add edi,edx
1801: dec ebx
1802: jnz @B
1803: jmp pGlyphLoop
1804:
1805: ;-----------------------------------------------------------------------;
1806: ; OR all bytes, N bytes wide dest, unrotated.
1807: ;-----------------------------------------------------------------------;
1808: align 4
1809: or_all_N_wide_unrotated:
1810: mov edx,ulBufDelta
1811: mov eax,ulWidthInBytes
1812: sub edx,eax
1813: shr eax,1 ;width in words
1814: jc short or_odd_width ;there's at least one odd byte
1815: shr eax,1 ;width in dwords
1816: jc short or_two_odd_bytes ;there's an odd word
1817: ;copy width is a dword multiple
1818: or_no_odd_bytes_loop:
1819: push ebx ;preserve scan count
1820: mov ebx,eax
1821: @@:
1822: mov ecx,[esi]
1823: add esi,4
1824: or [edi],ecx
1825: add edi,4 ;copy as many dwords as possible
1826: dec ebx
1827: jnz @B
1828: add edi,edx
1829: pop ebx ;restore scan count
1830: dec ebx
1831: jnz or_no_odd_bytes_loop
1832: jmp pGlyphLoop
1833:
1834: align 4
1835: or_odd_width:
1836: shr eax,1 ;width in dwords
1837: jc short three_odd_bytes ;there's an odd word and an odd byte
1838: ;there's just an odd byte
1839: inc edx ;skip over last byte too
1840: or_one_odd_bytes_loop:
1841: push ebx ;preserve scan count
1842: mov ebx,eax
1843: @@:
1844: mov ecx,[esi]
1845: add esi,4
1846: or [edi],ecx
1847: add edi,4 ;copy as many dwords as possible
1848: dec ebx
1849: jnz @B
1850: mov cl,[esi]
1851: or [edi],cl
1852: inc esi
1853: add edi,edx
1854: pop ebx ;restore scan count
1855: dec ebx
1856: jnz or_one_odd_bytes_loop
1857: jmp pGlyphLoop
1858:
1859: align 4
1860: or_two_odd_bytes:
1861: add edx,2 ;skip over last 2 bytes too
1862: or_two_odd_bytes_loop:
1863: push ebx ;preserve scan count
1864: mov ebx,eax
1865: @@:
1866: mov ecx,[esi]
1867: add esi,4
1868: or [edi],ecx
1869: add edi,4 ;copy as many dwords as possible
1870: dec ebx
1871: jnz @B
1872: mov cx,[esi]
1873: or [edi],cx
1874: add esi,2
1875: add edi,edx
1876: pop ebx ;restore scan count
1877: dec ebx
1878: jnz or_two_odd_bytes_loop
1879: jmp pGlyphLoop
1880:
1881: align 4
1882: or_three_odd_bytes:
1883: add edx,3 ;skip over last 3 bytes too
1884: or_three_odd_bytes_loop:
1885: push ebx ;preserve scan count
1886: mov ebx,eax
1887: @@:
1888: mov ecx,[esi]
1889: add esi,4
1890: or [edi],ecx
1891: add edi,4 ;copy as many dwords as possible
1892: dec ebx
1893: jnz @B
1894: mov cx,[esi]
1895: or [edi],cx
1896: mov cl,[esi+2]
1897: or [edi+2],cl
1898: add esi,3
1899: add edi,edx
1900: pop ebx ;restore scan count
1901: dec ebx
1902: jnz or_three_odd_bytes_loop
1903: jmp pGlyphLoop
1904:
1905: ;-----------------------------------------------------------------------;
1906: ; At this point, the text is drawn to the temp buffer.
1907: ; Now, color-expand the temp buffer to the screen.
1908: ;
1909: ; Input:
1910: ; ppdev = pointer to target surface's PDEV (screen)
1911: ; prclText = pointer to text bounding rectangle
1912: ; prclOpaque = pointer to opaquing rectangle, if there is one
1913: ; iFgColor = text color
1914: ; iBgColor = opaquing rectangle color, if there is one
1915: ; ulTempLeft = X coordinate on dest of left edge of temp buffer pointed
1916: ; to by pTempBuffer
1917: ; pTempBuffer = pointer to first byte (upper left corner) of
1918: ; temp buffer into which we're drawing. This should be
1919: ; word-aligned with the destination
1920: ; ulBufDelta = destination scan-to-scan offset
1921: ; Text drawn to temp buffer
1922: ;
1923: ;-----------------------------------------------------------------------;
1924: align 4
1925: draw_to_screen:
1926:
1927: ;-----------------------------------------------------------------------;
1928: ; Is this transparent or opaque text?
1929: ;-----------------------------------------------------------------------;
1930:
1931: cmp prclOpaque,0
1932: jnz opaque_text
1933:
1934: ;-----------------------------------------------------------------------;
1935: ; Transparent text.
1936: ;-----------------------------------------------------------------------;
1937:
1938: ;-----------------------------------------------------------------------;
1939: ; Calculate drawing parameters.
1940: ;-----------------------------------------------------------------------;
1941:
1942: mov esi,prclText
1943: mov ebx,ppdev
1944: mov eax,[esi].xRight
1945: mov edx,[esi].xLeft
1946: and edx,not 7
1947: add eax,7
1948: sub eax,edx
1949: shr eax,3 ;width of text in temp buffer in bytes, rounded
1950: mov ulXparBytes,eax ; up. Also number of quadpixels to draw
1951:
1952: mov ecx,[ebx].pdev_lNextScan
1953: mov ulScreenDelta,ecx
1954: shl eax,3 ;each temp buffer byte maps to eight VGA
1955: ; addresses (two quadpixels in linear mode)
1956: sub ecx,eax ;offset to next scan in screen
1957: mov ulTmpDstDelta,ecx
1958:
1959: ;-----------------------------------------------------------------------;
1960: ; Calculate the offset of the initial destination quadpixel.
1961: ;-----------------------------------------------------------------------;
1962:
1963: mov eax,[esi].yTop
1964: mul ulScreenDelta
1965: mov edi,ulTempLeft
1966: add edi,eax ;offset in bitmap of first quadpixel's byte
1967: ; (remember, this is linear mode)
1968: ;-----------------------------------------------------------------------;
1969: ; Map in the bank containing the top scan of the text, if it's not
1970: ; mapped in already.
1971: ;-----------------------------------------------------------------------;
1972:
1973: mov eax,[esi].yTop ;top scan line of text
1974: mov ulTopScan,eax
1975: mov esi,pTempBuffer ;initial source address
1976: cmp eax,[ebx].pdev_rcl1WindowClip.yTop ;is text top less than
1977: ; current bank?
1978: jl short xpar_map_init_bank ;yes, map in proper bank
1979: cmp eax,[ebx].pdev_rcl1WindowClip.yBottom ;text top greater than
1980: ; current bank?
1981: jl short xpar_init_bank_mapped ;no, proper bank already mapped
1982: xpar_map_init_bank:
1983:
1984: ; Map in the bank containing the top scan line of the fill.
1985: ; Preserves EBX, ESI, and EDI.
1986:
1987: ptrCall <dword ptr [ebx].pdev_pfnBankControl>,<ebx,eax,JustifyTop>
1988:
1989: xpar_init_bank_mapped:
1990:
1991: add edi,[ebx].pdev_pvBitmapStart ;initial destination address
1992:
1993: ;-----------------------------------------------------------------------;
1994: ; Main loop for processing fill in each bank.
1995: ;
1996: ; At start of loop, EBX->pdsurf
1997: ;-----------------------------------------------------------------------;
1998:
1999: xpar_bank_loop:
2000: mov edx,ulBottomScan ;bottom of destination rectangle
2001: cmp edx,[ebx].pdev_rcl1WindowClip.yBottom
2002: ;which comes first, the bottom of the
2003: ; text rect or the bottom of the
2004: ; current bank?
2005: jl short @F ;text bottom comes first, so draw to
2006: ; that; this is the last bank in text
2007: mov edx,[ebx].pdev_rcl1WindowClip.yBottom
2008: ;bank bottom comes first; draw to
2009: ; bottom of bank
2010: @@:
2011: sub edx,ulTopScan ;# of scans to draw in bank
2012:
2013: mov al,byte ptr iFgColor
2014: mov ah,al
2015: mov ebx,eax
2016: shl eax,16
2017: mov ax,bx ;put drawing color in all bytes of EAX
2018:
2019: sub ebx,ebx ;prepare for look-up in loop
2020: xpar_scan_loop:
2021:
2022: mov ecx,ulXparBytes ;number of quadpixel pairs to draw
2023:
2024: mov bl,[esi] ;get next glyph byte
2025: and bl,bl ;are all 8 pixels transparent?
2026: jz xpar_low_nibble_0 ;yes, just skip everything in this byte
2027: shr bl,4 ;shift the high nibble into the low
2028: ; nibble
2029: jmp xpar_high_nibble_table[ebx*4] ;branch to draw up to four
2030: ; pixels, followed by a branch to
2031: ; draw the the other nibble (up
2032: ; to four more pixels)
2033: align 4
2034: xpar_scan_done:
2035:
2036: add edi,ulTmpDstDelta ;point to next screen scan
2037:
2038: dec edx ;count down scans
2039: jnz xpar_scan_loop
2040:
2041: ;-----------------------------------------------------------------------;
2042: ; See if there are more banks to draw.
2043: ;-----------------------------------------------------------------------;
2044:
2045: mov ebx,ppdev
2046: mov eax,[ebx].pdev_rcl1WindowClip.yBottom ;is the text bottom in
2047: cmp ulBottomScan,eax ; the current bank?
2048: jnle short do_next_xpar_bank ;no, map in the next bank and draw
2049:
2050: cRet vFastText ;yes, so we're done
2051:
2052: align 4
2053: do_next_xpar_bank:
2054: mov ulTopScan,eax
2055: sub edi,[ebx].pdev_pvBitmapStart ;convert from address to offset
2056: ; within bitmap
2057: ptrCall <dword ptr [ebx].pdev_pfnBankControl>,<ebx,eax,JustifyTop>
2058: ;map in the bank (call preserves
2059: ; EBX, ESI, and EDI)
2060: add edi,[ebx].pdev_pvBitmapStart ;convert from offset within bitmap
2061: ; to address (bitmap start just
2062: ; moved)
2063: jmp xpar_bank_loop ;we're ready to draw to the new
2064: ; bank
2065:
2066: ;-----------------------------------------------------------------------
2067: ; Routines to draw 0-4 pixels with the color in each byte of EAX, depending
2068: ; on the value of the nibble describing the four pixels to draw. "high_nibble"
2069: ; routines draw based on the upper nibble of the byte pointed to by ESI;
2070: ; "low_nibble" routines draw based on the lower nibble of that byte.
2071: ;
2072: ; EAX = color with which to draw, repeated four times
2073: ; EBX = zero (0)
2074: ; ECX = the number of nibble pairs (source bytes = pixels*8) to draw
2075: ; EDX = not used (preserved)
2076: ; ESI = pointer to first nibble pair to draw
2077: ; EDI = pointer to first destination byte to which to draw
2078: ;
2079: ; Must always be entered on the high nibble and extended for an even number of
2080: ; nibbles.
2081:
2082: ;-----------------------------------------------------------------------
2083: ; Macro to draw the four pixels represented by the high nibble of the byte at
2084: ; [ESI].
2085:
2086: DO_HIGH_NIBBLE macro
2087: inc esi ;point to the next glyph byte
2088: add edi,8 ;point to the next destination 8-pixel set
2089: dec ecx ;count down nibble pairs (8-pixel sets)
2090: jz xpar_scan_done ;done with this scan
2091: mov bl,[esi] ;not done; get next glyph byte
2092: and bl,bl ;are all 8 pixels transparent?
2093: jz xpar_low_nibble_0 ;yes, just skip everything in this byte
2094: shr bl,4 ;shift the high nibble into the low nibble
2095: jmp xpar_high_nibble_table[ebx*4] ;branch to draw up to four pixels
2096: endm
2097:
2098: ;-----------------------------------------------------------------------
2099: ; Macro to draw the four pixels represented by the low nibble of the byte at
2100: ; [ESI].
2101:
2102: DO_LOW_NIBBLE macro
2103: mov bl,[esi] ;get glyph byte again, for the low nibble this
2104: ; time
2105: and ebx,0fh ;isolate the low nibble
2106: jmp xpar_low_nibble_table[ebx*4] ;branch to draw up to four pixels
2107: endm
2108:
2109: ;-----------------------------------------------------------------------
2110:
2111: align 4
2112: xpar_high_nibble_F:
2113: mov [edi],eax
2114: xpar_high_nibble_0:
2115: DO_LOW_NIBBLE
2116:
2117: align 4
2118: xpar_high_nibble_E:
2119: mov [edi],ax
2120: mov [edi+2],al
2121: DO_LOW_NIBBLE
2122:
2123: align 4
2124: xpar_high_nibble_D:
2125: mov [edi],ax
2126: mov [edi+3],al
2127: DO_LOW_NIBBLE
2128:
2129: align 4
2130: xpar_high_nibble_C:
2131: mov [edi],ax
2132: DO_LOW_NIBBLE
2133:
2134: align 4
2135: xpar_high_nibble_B:
2136: mov [edi],al
2137: mov [edi+2],ax
2138: DO_LOW_NIBBLE
2139:
2140: align 4
2141: xpar_high_nibble_8:
2142: mov [edi],al
2143: DO_LOW_NIBBLE
2144:
2145: align 4
2146: xpar_high_nibble_6:
2147: mov [edi+1],ax
2148: DO_LOW_NIBBLE
2149:
2150: align 4
2151: xpar_high_nibble_5:
2152: mov [edi+1],al
2153: mov [edi+3],al
2154: DO_LOW_NIBBLE
2155:
2156: align 4
2157: xpar_high_nibble_4:
2158: mov [edi+1],al
2159: DO_LOW_NIBBLE
2160:
2161: align 4
2162: xpar_high_nibble_7:
2163: mov [edi+1],al
2164: xpar_high_nibble_3:
2165: mov [edi+2],ax
2166: DO_LOW_NIBBLE
2167:
2168: align 4
2169: xpar_high_nibble_A:
2170: mov [edi],al
2171: xpar_high_nibble_2:
2172: mov [edi+2],al
2173: DO_LOW_NIBBLE
2174:
2175: align 4
2176: xpar_high_nibble_9:
2177: mov [edi],al
2178: xpar_high_nibble_1:
2179: mov [edi+3],al
2180: DO_LOW_NIBBLE
2181:
2182:
2183: align 4
2184: xpar_low_nibble_0:
2185: DO_HIGH_NIBBLE
2186:
2187: align 4
2188: xpar_low_nibble_F:
2189: mov [edi+4],eax
2190: DO_HIGH_NIBBLE
2191:
2192: align 4
2193: xpar_low_nibble_E:
2194: mov [edi+4],ax
2195: mov [edi+6],al
2196: DO_HIGH_NIBBLE
2197:
2198: align 4
2199: xpar_low_nibble_D:
2200: mov [edi+4],ax
2201: mov [edi+7],al
2202: DO_HIGH_NIBBLE
2203:
2204: align 4
2205: xpar_low_nibble_C:
2206: mov [edi+4],ax
2207: DO_HIGH_NIBBLE
2208:
2209: align 4
2210: xpar_low_nibble_B:
2211: mov [edi+4],al
2212: mov [edi+6],ax
2213: DO_HIGH_NIBBLE
2214:
2215: align 4
2216: xpar_low_nibble_8:
2217: mov [edi+4],al
2218: DO_HIGH_NIBBLE
2219:
2220: align 4
2221: xpar_low_nibble_6:
2222: mov [edi+5],ax
2223: DO_HIGH_NIBBLE
2224:
2225: align 4
2226: xpar_low_nibble_5:
2227: mov [edi+5],al
2228: mov [edi+7],al
2229: DO_HIGH_NIBBLE
2230:
2231: align 4
2232: xpar_low_nibble_4:
2233: mov [edi+5],al
2234: DO_HIGH_NIBBLE
2235:
2236: align 4
2237: xpar_low_nibble_7:
2238: mov [edi+5],al
2239: xpar_low_nibble_3:
2240: mov [edi+6],ax
2241: DO_HIGH_NIBBLE
2242:
2243: align 4
2244: xpar_low_nibble_A:
2245: mov [edi+4],al
2246: xpar_low_nibble_2:
2247: mov [edi+6],al
2248: DO_HIGH_NIBBLE
2249:
2250: align 4
2251: xpar_low_nibble_9:
2252: mov [edi+4],al
2253: xpar_low_nibble_1:
2254: mov [edi+7],al
2255: DO_HIGH_NIBBLE
2256:
2257:
2258: ;-----------------------------------------------------------------------;
2259: ; Opaque text.
2260: ;-----------------------------------------------------------------------;
2261:
2262: align 4
2263: opaque_text:
2264:
2265: ;-----------------------------------------------------------------------;
2266: ; Calculate drawing parameters.
2267: ;-----------------------------------------------------------------------;
2268:
2269: mov ebx,ppdev
2270: mov esi,prclText ;point to bounding rectangle for text
2271:
2272: mov eax,[ebx].pdev_lPlanarNextScan ;set the screen width in
2273: mov ulScreenDelta,eax ; quadpixels
2274:
2275: sub eax,eax ;assume clipped edge bytes won't need
2276: mov ulLeftEdgeShift,eax ; to be shifted into position
2277: mov ulRightEdgeShift,eax
2278:
2279: mov eax,[esi].xRight
2280: mov ebx,eax
2281: and ebx,11b ;dest right edge % 4
2282: mov edx,[esi].xLeft
2283: mov ulTextLeft,edx ;remember dest left edge
2284: mov cl,jOpaqueRightMasks[ebx] ;set right edge clip mask
2285: mov ebx,edx
2286: and ebx,11b ;dest left edge % 4
2287: mov ulRightMask,ecx
2288: mov cl,jOpaqueLeftMasks[ebx] ;set left edge clip mask
2289: mov ulLeftMask,ecx
2290:
2291: and edx,not 7 ;left edge, rounded down to nearest byte
2292: dec eax ;right edge - 1
2293: sub eax,edx
2294: shr eax,3 ;width of the text in the temp buffer in bytes,
2295: ; rounded up, minus 1. This is used to point to
2296: ; the partial right edge, if there is one
2297: mov ulTextWidthInBytesMinus1,eax
2298:
2299: ;-----------------------------------------------------------------------;
2300: ; Figure out what edges we need to handle, and calculate some info for
2301: ; doing whole bytes.
2302: ;-----------------------------------------------------------------------;
2303:
2304: mov edx,[esi].xLeft
2305: mov eax,[esi].xRight
2306: and edx,not 3
2307: add eax,3
2308: sub eax,edx
2309: shr eax,2 ;width of the text in the temp buffer in
2310: ; quadpixels, rounded up (counting all whole
2311: ; and partial quadpixels)
2312: cmp eax,1 ;only one quadpixels total?
2313: jnz short @F ;no
2314: ;yes, special case a single quadpixel
2315: mov ecx,offset opaq_check_more_banks ;assume it's a solid
2316: ; quadpixel
2317: mov ebx,ulLeftMask
2318: and ebx,ulRightMask
2319: cmp bl,0ffh ;solid quadpixel?
2320: jz short opaq_set_deltas_and_edge_vector ;yes, all set
2321: mov ulLeftMask,ebx ;no, draw as a left edge
2322: dec eax ;there are no whole quadpixels
2323: mov ecx,offset opaq_draw_left_edge_only
2324: test [esi].xLeft,100b ;is partial quadpixel in bits 0-3?
2325: jnz opaq_set_edge_vector ;yes, no shift required, already set
2326: mov ulLeftEdgeShift,4 ;no, must shift right 4 to get into
2327: ; bits 0-3
2328: jmp short opaq_set_edge_vector ;yes, all set
2329:
2330: align 4
2331: @@:
2332: lea edx,[eax-1]
2333: mov ulVGAWidthInBytesMinus1,edx ;offset from leftmost VGA dest byte
2334: ; to rightmost
2335:
2336: test [esi].xLeft,11b ;is left edge a solid quadpixel?
2337: jz short opaq_left_edge_solid ;yes
2338: dec eax ;one less whole quadpixel
2339: mov ecx,offset opaq_draw_left_edge_only ;assume right edge is solid
2340: test [esi].xLeft,100b ;is partial quadpixel in bits 0-3?
2341: jnz short @F ;yes, no shift required, already set
2342: mov ulLeftEdgeShift,4 ;no, must shift right 4 to get into
2343: ; bits 0-3
2344: @@:
2345: test [esi].xRight,11b ;is right edge a solid quadpixel?
2346: jz short opaq_set_deltas_and_edge_vector ;yes, all set
2347: dec eax ;one less whole quadpixel
2348: mov ecx,offset opaq_draw_both_edges ;both edges are non-solid
2349: jmp short opaq_set_right_edge_shift
2350:
2351: align 4
2352: opaq_left_edge_solid:
2353: mov ecx,offset opaq_check_more_banks ;assume right edge is solid
2354: test [esi].xRight,11b ;is right edge a solid quadpixel?
2355: jz short opaq_set_deltas_and_edge_vector ;yes, all set
2356: dec eax ;one less whole quadpixel
2357: mov ecx,offset opaq_draw_right_edge_only ;no, do non-solid right
2358: ; edge
2359: opaq_set_right_edge_shift:
2360: test [esi].xRight,100b ;is partial quadpixel in bits 0-3?
2361: jnz short opaq_set_deltas_and_edge_vector
2362: ;yes, no shift required, already set
2363: mov ulRightEdgeShift,4 ;no, must shift right 4 to get into
2364: ; bits 0-3
2365:
2366: ; At this point, EAX = # of whole quadpixels across source = # of whole bytes
2367: ; (addresses) across destination
2368:
2369: opaq_set_deltas_and_edge_vector:
2370: mov edi,ulScreenDelta
2371: sub edi,eax ;whole bytes offset to next scan in screen
2372: ; (there are four pixels--one quadpixel--
2373: ; at each VGA address)
2374: mov ulTmpDstDelta,edi
2375:
2376: mov edx,[esi].xLeft
2377: mov edi,[esi].xRight
2378: add edx,3
2379: and edx,not 7
2380: add edi,4
2381: sub edi,edx
2382: shr edi,3 ;width of the text in the temp buffer in bytes,
2383: ; counting bytes containing whole quadpixels
2384: ; but not bytes containing only partial
2385: ; quadpixels. (Remember, text bytes map to
2386: ; quadpixel pairs; text nibbles map to
2387: ; quadpixels)
2388: sub edi,ulBufDelta
2389: neg edi
2390: mov ulTmpSrcDelta,edi ;offset to next scan in source buffer when
2391: ; doing whole quadpixels
2392: opaq_set_edge_vector:
2393: mov pfnEdgeVector,ecx ;save address of partial-quadpixel-
2394: ; drawing code, or end of loop if no
2395: ; partial edge
2396: mov edx,eax ;# of whole quadpixels
2397: mov pfnFirstOpaqVector,offset opaq_whole_quadpixels
2398: ;assume there are whole quadpixels
2399: ; to copy, in which case we'll draw
2400: ; them first, then the partial edge
2401: ; quadpixels
2402: sub edi,edi
2403: shr edx,1 ;# of quadpixels / 2
2404: mov ulWholeWidthInQuadpixelPairs,edx ;# of quadpixel pairs to copy
2405: adc edi,edi ;odd quadpixel status
2406: mov ulOddQuadpixel,edi ;1 if there is an odd quadpixel, 0 else
2407: dec edx
2408: mov ulWholeWidthInQuadpixelPairsMinus1,edx
2409: ;# of whole quadpixel pairs to copy,
2410: ; minus 1 (for case with both leading
2411: ; and trailing quadpixels)
2412: cmp eax,0 ;are there any whole quadpixels at all?
2413: jg short @F ;yes, we're all set
2414: ;no, set up for edge(s) only
2415: mov pfnFirstOpaqVector,ecx ;the edges are first and only, because
2416: ; there are no whole quadpixels
2417: @@:
2418:
2419: ;-----------------------------------------------------------------------;
2420: ; Determine the screen offset of the first destination byte.
2421: ;-----------------------------------------------------------------------;
2422:
2423: mov ebx,ppdev
2424: mov eax,ulTopScan
2425: mov ecx,eax
2426: mul ulScreenDelta
2427: mov edi,[esi].xLeft
2428: shr edi,2 ;left edge screen offset in quadpixels
2429: add edi,eax
2430:
2431: ;-----------------------------------------------------------------------;
2432: ; Map in the bank containing the top scan of the text, if it's not
2433: ; mapped in already.
2434: ;-----------------------------------------------------------------------;
2435:
2436: cmp ecx,[ebx].pdev_rcl1PlanarClip.yTop ;is text top less than
2437: ; current bank?
2438: jl short opaq_map_init_bank ;yes, map in proper bank
2439: cmp ecx,[ebx].pdev_rcl1PlanarClip.yBottom ;text top greater than
2440: ; current bank?
2441: jl short opaq_init_bank_mapped ;no, proper bank already mapped
2442: opaq_map_init_bank:
2443:
2444: ; Map in the bank containing the top scan line of the fill.
2445: ; Preserves EBX, ESI, and EDI.
2446:
2447: ptrCall <dword ptr [ebx].pdev_pfnPlanarControl>,<ebx,ecx,JustifyTop>
2448:
2449: opaq_init_bank_mapped:
2450:
2451: add edi,[ebx].pdev_pvBitmapStart ;initial destination address
2452:
2453: ;-----------------------------------------------------------------------;
2454: ; Load the latches with the background color.
2455: ;-----------------------------------------------------------------------;
2456:
2457: sub eax,eax
2458: mov edx,[esi].xLeft
2459: and edx,011b
2460: cmp eax,edx ;is the first quadpixel a full
2461: ; quadpixel?
2462: adc eax,eax ;if so, EAX = 1, else EAX = 0
2463: mov edx,iBgColor
2464: mov [edi+eax],dl ;write the bg color to the first full
2465: ; quadpixel, in each of the four planes
2466: mov dl,[edi+eax] ;read back the quadpixel to load the
2467: ; latches with the bg color
2468:
2469: ;-----------------------------------------------------------------------;
2470: ; Set up the VGA's hardware for read mode 0 and write mode 2, the ALUs
2471: ; for XOR, and the Bit Mask to 1 for bits that differ between the fg and
2472: ; bg, 0 for bits that are the same.
2473: ;-----------------------------------------------------------------------;
2474:
2475: mov edx,VGA_BASE + GRAF_ADDR
2476: mov ah,byte ptr [ebx].pdev_ulrm0_wmX[2]
2477: ;write mode 2 setting for Graphics Mode
2478: mov al,GRAF_MODE
2479: out dx,ax ;write mode 2 to expand glyph bits to
2480: ; 0 or 0ffh per plane
2481: mov eax,GRAF_DATA_ROT + (DR_XOR SHL 8)
2482: out dx,ax ;XOR to flip latched data to make ~bg
2483:
2484: mov ah,byte ptr iBgColor
2485: xor ah,byte ptr iFgColor
2486: mov al,GRAF_BIT_MASK
2487: out dx,ax ;pass through common fg & bg bits
2488: ; unchanged from bg color in latches;
2489: ; non-common bits come from XOR in the
2490: ; ALUs, flipped from the bg to the fg
2491: ; state if the glyph bit for the pixel
2492: ; in that plane is 1, still in bg state
2493: ; if the glyph bit for that plane is 0
2494:
2495: ;-----------------------------------------------------------------------;
2496: ; Main loop for processing fill in each bank.
2497: ;
2498: ; At start of loop and on each loop, EBX->ppdev and EDI->first destination
2499: ; byte.
2500: ;-----------------------------------------------------------------------;
2501:
2502: opaq_bank_loop:
2503: mov pScreen,edi ;remember initial copy destination
2504:
2505: mov edx,ulBottomScan ;bottom of destination rectangle
2506: cmp edx,[ebx].pdev_rcl1PlanarClip.yBottom
2507: ;which comes first, the bottom of the
2508: ; text rect or the bottom of the
2509: ; current bank?
2510: jl short @F ;text bottom comes first, so draw to
2511: ; that; this is the last bank in text
2512: mov edx,[ebx].pdev_rcl1PlanarClip.yBottom
2513: ;bank bottom comes first; draw to
2514: ; bottom of bank
2515: @@:
2516: sub edx,ulTopScan ;# of scans to draw in bank
2517: mov ulNumScans,edx
2518: jmp pfnFirstOpaqVector ;do first sort of drawing (whole
2519: ; bytes, or edge(s) if no whole
2520: ; bytes)
2521:
2522: ;-----------------------------------------------------------------------;
2523: ; Draw the whole quadpixels, handling as many as possible paired into
2524: ; bytes so we can draw 8 pixels at a time.
2525: ;
2526: ; On entry:
2527: ; EDI = first destination byte
2528: ;-----------------------------------------------------------------------;
2529: align 4
2530: opaq_whole_quadpixels:
2531: mov esi,pTempBuffer ;point to first source byte
2532: mov eax,ulTextLeft ;left edge
2533: test eax,011b ;is there a partial (masked) edge?
2534: jz short @f ;no, start addresses are correct
2535: inc edi ;yes, skip over one dest byte for the
2536: ; four pixels in the partial edge
2537: test eax,100b ;do we have a partial left edge in the
2538: ; second quadpixel?
2539: jz short @f ;no, source start address is correct
2540: inc esi ;yes, skip over a source byte because
2541: ; the partial edge is all that's in
2542: ; this byte
2543: @@:
2544: mov ebx,pGlyphFlipTable ;point to the look-up table we'll use
2545: ; to flip the glyph bits into the form
2546: ; required by planar mode
2547: mov edx,ulNumScans ;# of scans to draw
2548:
2549: ;decide which copy loop to use, based
2550: ; on the word-alignment of the dest
2551: ; rectangle with the screen
2552: ;the following tests rely on VGA even
2553: ; addresses being aligned to the start
2554: ; of corresponding source buffer bytes
2555: ; (4-pixel sets at even VGA addresses
2556: ; match up to the upper quadpixels of
2557: ; source buffer bytes)
2558: test edi,1 ;is dest word-aligned?
2559: jnz short opaq_need_leading ;no, need leading quadpixel
2560: ;yes, no leading quadpixel
2561: cmp ulOddQuadpixel,1 ;odd width in quadpixels?
2562: jnz short opaq_scan_loop ;no, no trailing quadpixel
2563: jmp opaq_scan_loop_t ;yes, trailing quadpixel
2564:
2565: align 4
2566: opaq_need_leading: ;there's a leading quadpixel
2567: cmp ulOddQuadpixel,1 ;odd width in quadpixels?
2568: jnz opaq_scan_loop_lt ;no, trailing quadpixel
2569: jmp opaq_scan_loop_l ;yes, no trailing quadpixel
2570:
2571:
2572: ;-----------------------------------------------------------------------;
2573: ; Loops for copying whole quadpixels to the screen, as much as possible a
2574: ; quadpixel pair at a time.
2575: ; On entry:
2576: ; EBX = pointer to flip table
2577: ; EDX = # of scans to draw
2578: ; ESI = pointer to first buffer byte from which to copy
2579: ; EDI = pointer to first screen byte to which to copy
2580: ; ulTmpSrcDelta = offset to next buffer scan
2581: ; ulTmpDstDelta = offset to next destination (VGA) scan
2582: ; ulWholeWidthInQuadpixelPairs = # of whole bytes to copy
2583: ; ulWholeWidthInQuadpixelPairsMinus1 = # of whole bytes to copy, minus 1
2584: ; LATER could break out and optimize short runs, such as 1, 2, 3, 4 wide.
2585: ;-----------------------------------------------------------------------;
2586:
2587: ;-----------------------------------------------------------------------;
2588: ; Loop for doing whole opaque words: no leading quadpixel, no trailing
2589: ; quadpixel.
2590: ;-----------------------------------------------------------------------;
2591: align 4
2592: opaq_scan_loop:
2593: opaq_sl_row_loop:
2594: mov ecx,ulWholeWidthInQuadpixelPairs
2595: inc ecx ;adjust for word calc
2596: shr ecx,1 ;we'll do a word per loop
2597: ;guaranteed to have at least one word to do, or
2598: ; we wouldn't be here
2599: jnc short opaq_sl_enter_odd
2600: opaq_sl_byte_loop:
2601: mov bl,[esi] ;get the next temp buffer byte
2602: mov al,[ebx] ;reverse the order of bits 0-3 and 4-7
2603: inc esi ;point to the next temp buffer byte
2604: mov ah,al
2605: shr al,4 ;first quadpixel to draw in AL, next in AH
2606: mov [edi],ax ;draw the glyph
2607: add edi,2 ;point to the next destination address
2608: opaq_sl_enter_odd:
2609: mov bl,[esi] ;get the next temp buffer byte
2610: mov al,[ebx] ;reverse the order of bits 0-3 and 4-7
2611: inc esi ;point to the next temp buffer byte
2612: mov ah,al
2613: shr al,4 ;first quadpixel to draw in AL, next in AH
2614: mov [edi],ax ;draw the glyph
2615: add edi,2 ;point to the next destination address
2616:
2617: dec ecx
2618: jnz opaq_sl_byte_loop
2619: opaq_sl_whole_done:
2620: add esi,ulTmpSrcDelta ;point to next buffer scan
2621: add edi,ulTmpDstDelta ;point to next screen scan
2622: dec edx ;count down scans
2623: jnz opaq_sl_row_loop
2624: jmp pfnEdgeVector ;do the edge(s)
2625:
2626:
2627: ;-----------------------------------------------------------------------;
2628: ; Loop for doing whole opaque words: leading quadpixel, no trailing
2629: ; quadpixel.
2630: ;-----------------------------------------------------------------------;
2631: align 4
2632: opaq_scan_loop_l:
2633: opaq_sll_row_loop:
2634: mov bl,[esi] ;get the first temp buffer byte
2635: inc esi ;point to the next temp buffer byte
2636: mov al,[ebx] ;reverse the order of bits 0-3
2637: mov [edi],al ;draw the first 4 pixels (the leading quadpixel)
2638: inc edi ;point to the next destination address
2639:
2640: mov ecx,ulWholeWidthInQuadpixelPairs
2641: inc ecx ;adjust for word calc
2642: shr ecx,1 ;we'll do a word per loop
2643: jz short opaq_sll_whole_done
2644: jnc short opaq_sll_enter_odd
2645: opaq_sll_byte_loop:
2646: mov bl,[esi] ;get the next temp buffer byte
2647: mov al,[ebx] ;reverse the order of bits 0-3 and 4-7
2648: inc esi ;point to the next temp buffer byte
2649: mov ah,al
2650: shr al,4 ;first quadpixel to draw in AL, next in AH
2651: mov [edi],ax ;draw the glyph
2652: add edi,2 ;point to the next destination address
2653: opaq_sll_enter_odd:
2654: mov bl,[esi] ;get the next temp buffer byte
2655: mov al,[ebx] ;reverse the order of bits 0-3 and 4-7
2656: inc esi ;point to the next temp buffer byte
2657: mov ah,al
2658: shr al,4 ;first quadpixel to draw in AL, next in AH
2659: mov [edi],ax ;draw the glyph
2660: add edi,2 ;point to the next destination address
2661:
2662: dec ecx
2663: jnz opaq_sll_byte_loop
2664: opaq_sll_whole_done:
2665: add esi,ulTmpSrcDelta ;point to next buffer scan
2666: add edi,ulTmpDstDelta ;point to next screen scan
2667: dec edx ;count down scans
2668: jnz opaq_sll_row_loop
2669: jmp pfnEdgeVector ;do the edge(s)
2670:
2671:
2672: ;-----------------------------------------------------------------------;
2673: ; Loop for doing whole opaque words: leading byte, trailing byte.
2674: ;-----------------------------------------------------------------------;
2675: align 4
2676: opaq_scan_loop_lt:
2677: opaq_sllt_row_loop:
2678: mov bl,[esi] ;get the first temp buffer byte
2679: inc esi ;point to the next temp buffer byte
2680: mov al,[ebx] ;reverse the order of bits 0-3
2681: mov [edi],al ;draw the first 4 pixels (the leading quadpixel)
2682: inc edi ;point to the next destination address
2683:
2684: mov ecx,ulWholeWidthInQuadpixelPairsMinus1
2685: inc ecx ;adjust for word calc
2686: shr ecx,1 ;we'll do a word per loop
2687: jz short opaq_sllt_whole_done
2688: jnc short opaq_sllt_enter_odd
2689: opaq_sllt_byte_loop:
2690: mov bl,[esi] ;get the next temp buffer byte
2691: mov al,[ebx] ;reverse the order of bits 0-3 and 4-7
2692: inc esi ;point to the next temp buffer byte
2693: mov ah,al
2694: shr al,4 ;first quadpixel to draw in AL, next in AH
2695: mov [edi],ax ;draw the glyph
2696: add edi,2 ;point to the next destination address
2697: opaq_sllt_enter_odd:
2698: mov bl,[esi] ;get the next temp buffer byte
2699: mov al,[ebx] ;reverse the order of bits 0-3 and 4-7
2700: inc esi ;point to the next temp buffer byte
2701: mov ah,al
2702: shr al,4 ;first quadpixel to draw in AL, next in AH
2703: mov [edi],ax ;draw the glyph
2704: add edi,2 ;point to the next destination address
2705:
2706: dec ecx
2707: jnz opaq_sllt_byte_loop
2708: opaq_sllt_whole_done:
2709: mov bl,[esi] ;get the last temp buffer byte
2710: inc esi ;point to the next temp buffer byte
2711: mov al,[ebx] ;reverse the order of bits 4-7
2712: shr eax,4 ;put the quadpixel in bits 0-3
2713: mov [edi],al ;draw the last 4 pixels (the trailing quadpixel)
2714: inc edi ;point to the next destination address
2715:
2716: add esi,ulTmpSrcDelta ;point to next buffer scan
2717: add edi,ulTmpDstDelta ;point to next screen scan
2718: dec edx ;count down scans
2719: jnz opaq_sllt_row_loop
2720: jmp pfnEdgeVector ;do the edge(s)
2721:
2722: ;-----------------------------------------------------------------------;
2723: ; Loop for doing whole opaque words: no leading byte, trailing byte.
2724: ;-----------------------------------------------------------------------;
2725: align 4
2726: opaq_scan_loop_t:
2727: opaq_slt_row_loop:
2728: mov ecx,ulWholeWidthInQuadpixelPairs
2729: inc ecx ;adjust for word calc
2730: shr ecx,1 ;we'll do a word per loop
2731: jz short opaq_slt_whole_done
2732: jnc short opaq_slt_enter_odd
2733: opaq_slt_byte_loop:
2734: mov bl,[esi] ;get the next temp buffer byte
2735: mov al,[ebx] ;reverse the order of bits 0-3 and 4-7
2736: inc esi ;point to the next temp buffer byte
2737: mov ah,al
2738: shr al,4 ;first quadpixel to draw in AL, next in AH
2739: mov [edi],ax ;draw the glyph
2740: add edi,2 ;point to the next destination address
2741: opaq_slt_enter_odd:
2742: mov bl,[esi] ;get the next temp buffer byte
2743: mov al,[ebx] ;reverse the order of bits 0-3 and 4-7
2744: inc esi ;point to the next temp buffer byte
2745: mov ah,al
2746: shr al,4 ;first quadpixel to draw in AL, next in AH
2747: mov [edi],ax ;draw the glyph
2748: add edi,2 ;point to the next destination address
2749:
2750: dec ecx
2751: jnz opaq_slt_byte_loop
2752: opaq_slt_whole_done:
2753: mov bl,[esi] ;get the last temp buffer byte
2754: inc esi ;point to the next temp buffer byte
2755: mov al,[ebx] ;reverse the order of bits 4-7
2756: shr eax,4 ;put the quadpixel in bits 0-3
2757: mov [edi],al ;draw the last 4 pixels (the trailing quadpixel)
2758: inc edi ;point to the next destination address
2759:
2760: add esi,ulTmpSrcDelta ;point to next buffer scan
2761: add edi,ulTmpDstDelta ;point to next screen scan
2762: dec edx ;count down scans
2763: jnz opaq_slt_row_loop
2764: jmp pfnEdgeVector ;do the edge(s)
2765:
2766: ;-----------------------------------------------------------------------;
2767: ; Draw a partial left edge.
2768: ;-----------------------------------------------------------------------;
2769: align 4
2770: opaq_draw_left_edge_only:
2771:
2772: push offset opaq_edges_done ;return here when done with edge
2773:
2774: opaq_draw_left_edge_only_entry:
2775: mov esi,pTempBuffer ;source start
2776: mov edi,pScreen ;destination (VGA) start
2777: mov ecx,ulLeftEdgeShift ;CL=amount by which to shift byte to
2778: ; right-justify desired quadpixel (0 or
2779: ; 4)
2780: mov eax,ulLeftMask ;clip mask for edge
2781:
2782: ; Enter here to copy a partial edge, with the Map Mask set to clip, ESI
2783: ; pointing to the first source byte to copy, EDI pointing to the first dest
2784: ; byte to copy to, CL the amount by which to right-shift to get the quadpixel
2785: ; of interest into bits 0-3, and AL the Map Mask setting to clip the edge
2786:
2787: opaq_draw_edge_entry:
2788: push ebp ;preserve stack frame pointer
2789:
2790: mov edx,VGA_BASE + SEQ_DATA ;SEQ_INDEX already points to Map Mask
2791: out dx,al ;set Map Mask for left edge
2792:
2793: mov eax,ulNumScans ;height of text
2794: and eax,11b ;# of fractional quadscans to copy
2795: mov edx,ulScreenDelta ;width of a screen scan in addresses
2796: mov ebx,dword ptr OpaqFgEdgeTable[eax*4]
2797: push ebx ;entry point into unrolled loop
2798: ;***stack frame unavailable***
2799: mov eax,ulNumScans ;height of text
2800: add eax,3
2801: shr eax,2 ;# of whole & partial quadscans to copy
2802: mov ebx,pGlyphFlipTable ;point to the look-up table we'll use
2803: ; to flip the glyph bits into the form
2804: ; required by planar mode
2805: mov ebp,ulBufDelta ;width of a source scan in bytes
2806: retn ;branch into the unrolled loop
2807:
2808: align 4
2809: OpaqFgEdgeTable label dword
2810: dd opaq_edge_entry_4
2811: dd opaq_edge_entry_1
2812: dd opaq_edge_entry_2
2813: dd opaq_edge_entry_3
2814:
2815: align 4
2816: opaq_edge_loop:
2817: opaq_edge_entry_4:
2818: mov bl,[esi] ;get the next text buffer byte
2819: shr bl,cl ;move the desired quadpixel into bits 0-3
2820: add esi,ebp ;point to the next destination byte
2821: mov bl,[ebx] ;reverse the order of bits 0-3
2822: mov [edi],bl ;draw up to four pixels, with the Map Mask
2823: ; clipping, if necessary
2824: add edi,edx ;point to the next destination byte
2825: opaq_edge_entry_3:
2826: mov bl,[esi] ;get the next text buffer byte
2827: shr bl,cl ;move the desired quadpixel into bits 0-3
2828: add esi,ebp ;point to the next destination byte
2829: mov bl,[ebx] ;reverse the order of bits 0-3
2830: mov [edi],bl ;draw up to four pixels, with the Map Mask
2831: ; clipping, if necessary
2832: add edi,edx ;point to the next destination byte
2833: opaq_edge_entry_2:
2834: mov bl,[esi] ;get the next text buffer byte
2835: shr bl,cl ;move the desired quadpixel into bits 0-3
2836: add esi,ebp ;point to the next destination byte
2837: mov bl,[ebx] ;reverse the order of bits 0-3
2838: mov [edi],bl ;draw up to four pixels, with the Map Mask
2839: ; clipping, if necessary
2840: add edi,edx ;point to the next destination byte
2841: opaq_edge_entry_1:
2842: mov bl,[esi] ;get the next text buffer byte
2843: shr bl,cl ;move the desired quadpixel into bits 0-3
2844: add esi,ebp ;point to the next destination byte
2845: mov bl,[ebx] ;reverse the order of bits 0-3
2846: mov [edi],bl ;draw up to four pixels, with the Map Mask
2847: ; clipping, if necessary
2848: add edi,edx ;point to the next destination byte
2849:
2850: dec eax
2851: jnz opaq_edge_loop
2852:
2853: pop ebp ;restore stack frame pointer
2854: ;***stack frame available***
2855: retn
2856:
2857: ;-----------------------------------------------------------------------;
2858: ; Draw a partial right edge only. Once we've set up the pointers, this
2859: ; is done with exactly the same code as the left edge.
2860: ;-----------------------------------------------------------------------;
2861: align 4
2862: opaq_draw_right_edge_only:
2863: push offset opaq_edges_done ;return here when done with edge
2864:
2865: opaq_draw_right_edge_only_entry:
2866: mov esi,ulTextWidthInBytesMinus1
2867: add esi,pTempBuffer ;point to right edge start in buffer
2868: mov edi,ulVGAWidthInBytesMinus1
2869: add edi,pScreen ;point to right edge start in screen
2870: mov ecx,ulRightEdgeShift ;CL=amount by which to shift byte to
2871: ; right-justify desired quadpixel (0 or
2872: ; 4)
2873: mov eax,ulRightMask ;clip mask for edge
2874:
2875: jmp opaq_draw_edge_entry
2876:
2877: ;-----------------------------------------------------------------------;
2878: ; Draw both left and right partial edges. We do this by calling first
2879: ; the left and then the right edge drawing code.
2880: ;-----------------------------------------------------------------------;
2881: align 4
2882: opaq_draw_both_edges:
2883: call opaq_draw_left_edge_only_entry
2884: call opaq_draw_right_edge_only_entry
2885:
2886: ;-----------------------------------------------------------------------;
2887: ; Restore Map Mask to enable all planes, now that we're done drawing
2888: ; partial edges.
2889: ;-----------------------------------------------------------------------;
2890:
2891: opaq_edges_done:
2892: mov al,MM_ALL
2893: mov edx,VGA_BASE + SEQ_DATA ;SEQ_INDEX already points to Map Mask
2894: out dx,al ;set Map Mask for left edge
2895:
2896: ;-----------------------------------------------------------------------;
2897: ; See if there are more banks to draw.
2898: ;-----------------------------------------------------------------------;
2899:
2900: opaq_check_more_banks:
2901: mov ebx,ppdev
2902: mov eax,[ebx].pdev_rcl1PlanarClip.yBottom ;is the text bottom in
2903: cmp ulBottomScan,eax ; the current bank?
2904: jnle short opaq_do_next_bank ;no, do the next bank
2905: ;yes, so we're done
2906:
2907: ;-----------------------------------------------------------------------;
2908: ; Restore the VGA's hardware to the default state.
2909: ; The Graphics Controller Index still points to the Bit Mask at this
2910: ; point.
2911: ;-----------------------------------------------------------------------;
2912:
2913: mov edx,VGA_BASE + GRAF_DATA
2914: mov al,0ffh
2915: out dx,al ;enable all bits through the Bit Mask
2916:
2917: mov esi,ppdev
2918: dec edx ;point back to the Graphics Index reg
2919: mov ah,byte ptr [esi].pdev_ulrm0_wmX[0]
2920: ;write mode 0 setting for Graphics Mode
2921: mov al,GRAF_MODE
2922: out dx,ax ;write mode 0, read mode 0
2923:
2924: mov eax,GRAF_DATA_ROT + (DR_SET SHL 8)
2925: out dx,ax ;replace mode, no rotate
2926:
2927: cRet vFastText
2928:
2929:
2930: align 4
2931: opaq_do_next_bank:
2932: mov esi,prclText
2933: mov ulTopScan,eax ;this will be the top of the next bank
2934: mov ecx,eax
2935: mul ulScreenDelta
2936: mov edi,[esi].xLeft
2937: shr edi,2 ;convert from pixels to quadpixels
2938: add edi,eax ;next screen byte to which to copy
2939:
2940: ptrCall <dword ptr [ebx].pdev_pfnPlanarControl>,<ebx,ecx,JustifyTop>
2941: ;map in the bank (call preserves EBX,
2942: ; ESI, and EDI)
2943:
2944: add edi,[ebx].pdev_pvBitmapStart ;initial destination address
2945:
2946: mov eax,ulBufDelta
2947: mul ulNumScans
2948: add pTempBuffer,eax ;advance to next temp buffer scan to
2949: ; copy
2950:
2951: jmp opaq_bank_loop ;we're ready to draw in the new bank
2952:
2953: ;-----------------------------------------------------------------------;
2954: ; Special 8-wide aligned opaque drawing code. Loads the latches with the
2955: ; background color, sets the Bit Mask to 1 for bits that differ between
2956: ; the foreground and background, sets the ALUs to XOR, then uses write
2957: ; mode 3 to draw the glyphs. Joyously, there are no partial bytes to
2958: ; worry about, so we can really crank up the code.
2959: ;
2960: ; On entry:
2961: ; EBX = prclText
2962: ;-----------------------------------------------------------------------;
2963: align 4
2964: special_8_wide_aligned_opaque:
2965:
2966: mov esi,ppdev
2967: mov edi,[ebx].yBottom
2968: mov eax,[ebx].yTop
2969: sub edi,eax ;height of glyphs
2970:
2971: ;-----------------------------------------------------------------------;
2972: ; Map in the bank containing the top scan of the text, if it's not
2973: ; mapped in already.
2974: ;-----------------------------------------------------------------------;
2975:
2976: cmp eax,[esi].pdev_rcl1PlanarClip.yTop ;is text top less than
2977: ; current bank?
2978: jl short s8wao_map_init_bank ;yes, map in proper bank
2979: cmp eax,[esi].pdev_rcl1PlanarClip.yBottom ;text top greater than
2980: ; current bank?
2981: jl short s8wa0_init_bank_mapped ;no, proper bank already mapped
2982: s8wao_map_init_bank:
2983:
2984: ; Map in the bank containing the top scan line of the text, making sure we're
2985: ; in planar mode at the same time.
2986: ; Preserves EBX, ESI, and EDI.
2987:
2988: ptrCall <dword ptr [esi].pdev_pfnPlanarControl>,<esi,eax,JustifyTop>
2989:
2990: s8wa0_init_bank_mapped:
2991:
2992: ;-----------------------------------------------------------------------;
2993: ; We handle only cases where the text lies entirely in one bank.
2994: ; LATER handle broken rasters and/or bank-spanning cases?
2995: ;-----------------------------------------------------------------------;
2996:
2997: mov eax,[esi].pdev_rcl1PlanarClip.yBottom
2998: sub eax,[ebx].yTop ;maximum run in bank
2999: cmp edi,eax ;does all the text fit in the bank?
3000: jg general_handler ;no, let general code handle it
3001:
3002: ;-----------------------------------------------------------------------;
3003: ; Set up variables.
3004: ;-----------------------------------------------------------------------;
3005:
3006: mov eax,edi ;# of scans in glyphs
3007: add eax,7
3008: shr eax,3 ;# of unrolled loops needed to draw
3009: ; glyph scans
3010: mov ulUnrolledCount,eax ;# of unrolled loop reps
3011: and edi,111b ;# of odd scans in first unrolled
3012: ; loop
3013: mov ulUnrolledOddCount,edi ;# of odd unrolled loop reps
3014:
3015: ;-----------------------------------------------------------------------;
3016: ; Point to the first screen byte at which to draw.
3017: ;-----------------------------------------------------------------------;
3018:
3019: mov eax,[ebx].yTop
3020: mul [esi].pdev_lPlanarNextScan
3021: mov edi,[ebx].xLeft
3022: shr edi,2
3023: add edi,eax ;next screen byte to which to copy
3024: add edi,[esi].pdev_pvBitmapStart ;initial destination address
3025: mov pScreen,edi
3026:
3027: ;-----------------------------------------------------------------------;
3028: ; Load the latches with the background color.
3029: ;-----------------------------------------------------------------------;
3030:
3031: mov eax,iBgColor
3032: mov byte ptr [edi],al ;write the bg color to the first byte
3033: mov al,[edi] ;read back the byte to load the
3034: ; latches with the bg color
3035:
3036: ;-----------------------------------------------------------------------;
3037: ; Set up the VGA's hardware for read mode 0 and write mode 2, the ALUs
3038: ; for XOR, and the Bit Mask to 1 for bits that differ between the fg and
3039: ; bg, 0 for bits that are the same.
3040: ;-----------------------------------------------------------------------;
3041:
3042: mov edx,VGA_BASE + GRAF_ADDR
3043: mov ah,byte ptr [esi].pdev_ulrm0_wmX[2]
3044: ;write mode 2 setting for Graphics Mode
3045: mov al,GRAF_MODE
3046: out dx,ax ;write mode 2 to expand glyph bits to
3047: ; 0 or 0ffh per plane
3048: mov eax,GRAF_DATA_ROT + (DR_XOR SHL 8)
3049: out dx,ax ;XOR to flip latched data to make ~bg
3050:
3051: mov ah,byte ptr iBgColor
3052: xor ah,byte ptr iFgColor
3053: mov al,GRAF_BIT_MASK
3054: out dx,ax ;pass through common fg & bg bits
3055: ; unchanged from bg color in latches;
3056: ; non-common bits come from XOR in the
3057: ; ALUs, flipped from the bg to the fg
3058: ; state if the glyph bit for the pixel
3059: ; in that plane is 1, still in bg state
3060: ; if the glyph bit for that plane is 0
3061:
3062: ;-----------------------------------------------------------------------;
3063: ; Set up the screen scan offset in EDX.
3064: ;-----------------------------------------------------------------------;
3065:
3066: mov edx,[esi].pdev_lPlanarNextScan ;offset from one scan to next
3067:
3068: mov ecx,ulGlyphCount
3069: mov eax,ulUnrolledOddCount ;# of odd unrolled loop reps
3070: mov eax,dword ptr s8wao_byte_table[eax*4] ;entry point into
3071: mov pGlyphLoop,eax ; unrolled loop
3072:
3073: s8wao_glyph_loop:
3074: mov ebx,pGlyphPos ;point to the current glyph to draw
3075: add pGlyphPos,(size GLYPHPOS) ;point to the next glyph
3076: mov edi,pScreen ;point to current glyph's screen
3077: ; location
3078: add pScreen,2 ;point to the next glyph's screen
3079: ; location
3080: mov esi,[ebx].gp_pgdf ;point to current glyph def
3081: mov esi,[esi].gdf_pgb ;point to current glyph
3082: add esi,gb_aj ;point to the current glyph's bits
3083: mov ecx,ulUnrolledCount ;# of unrolled loop reps
3084: mov ebx,pGlyphFlipTable ;point to the look-up table we'll use
3085: ; to flip the glyph bits into the form
3086: ; required by planar mode
3087: jmp pGlyphLoop ;branch into unrolled loop
3088:
3089: align 4
3090: s8wao_byte_table label dword
3091: dd s8wao_byte_8
3092: dd s8wao_byte_1
3093: dd s8wao_byte_2
3094: dd s8wao_byte_3
3095: dd s8wao_byte_4
3096: dd s8wao_byte_5
3097: dd s8wao_byte_6
3098: dd s8wao_byte_7
3099:
3100: align 4
3101: s8wao_byte_loop:
3102: s8wao_byte_8:
3103: mov bl,[esi] ;get the next glyph byte
3104: mov al,[ebx] ;reverse the order of bits 0-3 and 4-7
3105: inc esi ;point to the next glyph byte
3106: mov ah,al
3107: shr al,4 ;first quadpixel to draw in AL, next in AH
3108: mov [edi],ax ;draw the glyph
3109: add edi,edx ;point to the next destination byte
3110: s8wao_byte_7:
3111: mov bl,[esi] ;get the next glyph byte
3112: mov al,[ebx] ;reverse the order of bits 0-3 and 4-7
3113: inc esi ;point to the next glyph byte
3114: mov ah,al
3115: shr al,4 ;first quadpixel to draw in AL, next in AH
3116: mov [edi],ax ;draw the glyph
3117: add edi,edx ;point to the next destination byte
3118: s8wao_byte_6:
3119: mov bl,[esi] ;get the next glyph byte
3120: mov al,[ebx] ;reverse the order of bits 0-3 and 4-7
3121: inc esi ;point to the next glyph byte
3122: mov ah,al
3123: shr al,4 ;first quadpixel to draw in AL, next in AH
3124: mov [edi],ax ;draw the glyph
3125: add edi,edx ;point to the next destination byte
3126: s8wao_byte_5:
3127: mov bl,[esi] ;get the next glyph byte
3128: mov al,[ebx] ;reverse the order of bits 0-3 and 4-7
3129: inc esi ;point to the next glyph byte
3130: mov ah,al
3131: shr al,4 ;first quadpixel to draw in AL, next in AH
3132: mov [edi],ax ;draw the glyph
3133: add edi,edx ;point to the next destination byte
3134: s8wao_byte_4:
3135: mov bl,[esi] ;get the next glyph byte
3136: mov al,[ebx] ;reverse the order of bits 0-3 and 4-7
3137: inc esi ;point to the next glyph byte
3138: mov ah,al
3139: shr al,4 ;first quadpixel to draw in AL, next in AH
3140: mov [edi],ax ;draw the glyph
3141: add edi,edx ;point to the next destination byte
3142: s8wao_byte_3:
3143: mov bl,[esi] ;get the next glyph byte
3144: mov al,[ebx] ;reverse the order of bits 0-3 and 4-7
3145: inc esi ;point to the next glyph byte
3146: mov ah,al
3147: shr al,4 ;first quadpixel to draw in AL, next in AH
3148: mov [edi],ax ;draw the glyph
3149: add edi,edx ;point to the next destination byte
3150: s8wao_byte_2:
3151: mov bl,[esi] ;get the next glyph byte
3152: mov al,[ebx] ;reverse the order of bits 0-3 and 4-7
3153: inc esi ;point to the next glyph byte
3154: mov ah,al
3155: shr al,4 ;first quadpixel to draw in AL, next in AH
3156: mov [edi],ax ;draw the glyph
3157: add edi,edx ;point to the next destination byte
3158: s8wao_byte_1:
3159: mov bl,[esi] ;get the next glyph byte
3160: mov al,[ebx] ;reverse the order of bits 0-3 and 4-7
3161: inc esi ;point to the next glyph byte
3162: mov ah,al
3163: shr al,4 ;first quadpixel to draw in AL, next in AH
3164: mov [edi],ax ;draw the glyph
3165: add edi,edx ;point to the next destination byte
3166:
3167: dec ecx ;count down glyph scans
3168: jnz s8wao_byte_loop
3169:
3170: dec ulGlyphCount ;count down glyphs
3171: jnz s8wao_glyph_loop
3172:
3173: ;-----------------------------------------------------------------------;
3174: ; Restore the VGA's hardware to the default state.
3175: ; The Graphics Controller Index still points to the Bit Mask at this
3176: ; point.
3177: ;-----------------------------------------------------------------------;
3178:
3179: mov edx,VGA_BASE + GRAF_DATA
3180: mov al,0ffh
3181: out dx,al ;enable all bits through the Bit Mask
3182:
3183: mov esi,ppdev
3184: dec edx ;point back to the Graphics Index reg
3185: mov ah,byte ptr [esi].pdev_ulrm0_wmX[0]
3186: ;write mode 0 setting for Graphics Mode
3187: mov al,GRAF_MODE
3188: out dx,ax ;write mode 0, read mode 0
3189:
3190: mov eax,GRAF_DATA_ROT + (DR_SET SHL 8)
3191: out dx,ax ;replace mode, no rotate
3192:
3193: draw_prop_done:
3194: cRet vFastText
3195:
3196: endProc vFastText
3197:
3198: ;-----------------------------------------------------------------------;
3199: ; VOID vSetWriteModes(ULONG * pulWriteModes);
3200: ;
3201: ; Sets the four bytes at *pulWriteModes to the values to be written to
3202: ; the Graphics Mode register to select read mode 0 and:
3203: ; write mode 0, write mode 1, write mode 2, and write mode 3,
3204: ; respectively.
3205: ;
3206: ; Must already be in graphics mode when this is called.
3207: ;-----------------------------------------------------------------------;
3208:
3209: cProc vSetWriteModes,4,< \
3210: pulWriteModes:ptr >
3211:
3212: mov edx,VGA_BASE + GRAF_ADDR
3213: mov al,GRAF_MODE
3214: out dx,al ;point the GC Index to the Graphics Mode reg
3215: inc edx ;point to the GC Data reg
3216: in al,dx ;get the current setting of the Graphics Mode
3217: and eax,0fch ;mask off the write mode fields
3218: mov ah,al
3219: mov edx,eax
3220: shl edx,16
3221: or eax,edx ;put the Graphics Mode setting in all 4 bytes
3222: mov edx,pulWriteModes ;the mode values go here
3223: or eax,03020100h ;insert the write mode fields
3224: mov [edx],eax ;store the Graphics Mode settings
3225:
3226: cRet vSetWriteModes
3227:
3228: endProc vSetWriteModes
3229:
3230: ;-----------------------------------------------------------------------;
3231: ; VOID vClearMemDword(PULONG * pulBuffer, ULONG ulDwordCount);
3232: ;
3233: ; Clears ulCount dwords starting at pjBuffer.
3234: ;-----------------------------------------------------------------------;
3235:
3236: pulBuffer equ [esp+8]
3237: ulDwordCount equ [esp+12]
3238:
3239: cProc vClearMemDword,8,<>
3240:
3241: push edi
3242: mov edi,pulBuffer
3243: mov ecx,ulDwordCount
3244: sub eax,eax
3245: rep stosd
3246: pop edi
3247:
3248: cRet vClearMemDword
3249:
3250: endProc vClearMemDword
3251:
3252: public general_handler
3253: public draw_f_tb_no_to_temp_start
3254: public draw_nf_tb_no_to_temp_start
3255: public draw_to_temp_start_entry
3256: public draw_f_ntb_o_to_temp_start
3257: public draw_nf_ntb_o_to_temp_start
3258: public draw_to_temp_start_entry2
3259: public draw_f_tb_no_to_temp_loop
3260: public draw_nf_tb_no_to_temp_loop
3261: public draw_to_temp_loop_entry
3262: public draw_f_ntb_o_to_temp_loop
3263: public draw_nf_ntb_o_to_temp_loop
3264: public draw_to_temp_loop_entry2
3265: public or_all_1_wide_rotated_need_last
3266: public or_all_1_wide_rotated_no_last
3267: public or_first_1_wide_rotated_need_last
3268: public or_first_1_wide_rotated_no_last
3269: public or_first_1_wide_rotated_loop
3270: public mov_first_1_wide_rotated_need_last
3271: public mov_first_1_wide_rotated_no_last
3272: public mov_first_1_wide_rotated_loop
3273: public mov_first_1_wide_unrotated
3274: public mov_first_1_wide_unrotated_loop
3275: public or_all_1_wide_unrotated
3276: public or_all_1_wide_unrotated_loop
3277: public or_first_2_wide_rotated_need_last
3278: public or_first_2_wide_rotated_need_loop
3279: public or_all_2_wide_rotated_need_last
3280: public or_all_2_wide_rotated_need_loop
3281: public mov_first_2_wide_rotated_need_last
3282: public mov_first_2_wide_rotated_need_loop
3283: public or_first_2_wide_rotated_no_last
3284: public or_first_2_wide_rotated_loop
3285: public or_all_2_wide_rotated_no_last
3286: public or_all_2_wide_rotated_loop
3287: public mov_first_2_wide_rotated_no_last
3288: public mov_first_2_wide_rotated_loop
3289: public mov_first_2_wide_unrotated
3290: public mov_first_2_wide_unrotated_loop
3291: public or_all_2_wide_unrotated
3292: public or_all_2_wide_unrotated_loop
3293: public or_first_3_wide_rotated_need_last
3294: public or_all_3_wide_rotated_need_last
3295: public mov_first_3_wide_rotated_need_last
3296: public or_first_3_wide_rotated_no_last
3297: public or_all_3_wide_rotated_no_last
3298: public mov_first_3_wide_rotated_no_last
3299: public mov_first_3_wide_unrotated
3300: public or_all_3_wide_unrotated
3301: public or_first_4_wide_rotated_need_last
3302: public or_all_4_wide_rotated_need_last
3303: public mov_first_4_wide_rotated_need_last
3304: public or_first_4_wide_rotated_no_last
3305: public or_all_4_wide_rotated_no_last
3306: public mov_first_4_wide_rotated_no_last
3307: public mov_first_4_wide_unrotated
3308: public or_all_4_wide_unrotated
3309: public or_first_N_wide_rotated_need_last
3310: public or_all_N_wide_rotated_need_last
3311: public mov_first_N_wide_rotated_need_last
3312: public or_first_N_wide_rotated_no_last
3313: public or_all_N_wide_rotated_no_last
3314: public mov_first_N_wide_rotated_no_last
3315: public mov_first_N_wide_unrotated
3316: public odd_width
3317: public two_odd_bytes
3318: public three_odd_bytes
3319: public or_all_N_wide_unrotated
3320: public or_no_odd_bytes_loop
3321: public or_odd_width
3322: public or_one_odd_bytes_loop
3323: public or_two_odd_bytes
3324: public or_two_odd_bytes_loop
3325: public or_three_odd_bytes
3326: public or_three_odd_bytes_loop
3327: public draw_to_screen
3328: public opaque_text
3329: public opaq_left_edge_solid
3330: public opaq_set_edge_vector
3331: public opaq_map_init_bank
3332: public opaq_init_bank_mapped
3333: public opaq_bank_loop
3334: public opaq_whole_quadpixels
3335: public opaq_scan_loop
3336: public opaq_scan_loop_l
3337: public opaq_scan_loop_lt
3338: public opaq_scan_loop_t
3339: public opaq_draw_left_edge_only
3340: public opaq_draw_left_edge_only_entry
3341: public opaq_edge_loop
3342: public opaq_edge_entry_4
3343: public opaq_edge_entry_3
3344: public opaq_edge_entry_2
3345: public opaq_edge_entry_1
3346: public opaq_draw_right_edge_only
3347: public opaq_draw_right_edge_only_entry
3348: public opaq_draw_both_edges
3349: public opaq_check_more_banks
3350: public opaq_do_next_bank
3351: public special_8_wide_aligned_opaque
3352: public s8wa0_init_bank_mapped
3353: public s8wao_byte_loop
3354: public s8wao_byte_8
3355: public s8wao_byte_7
3356: public s8wao_byte_6
3357: public s8wao_byte_5
3358: public s8wao_byte_4
3359: public s8wao_byte_3
3360: public s8wao_byte_2
3361: public s8wao_byte_1
3362: public s8wao_map_init_bank
3363: public xpar_map_init_bank
3364: public xpar_init_bank_mapped
3365: public xpar_bank_loop
3366: public xpar_scan_loop
3367: public xpar_scan_done
3368: public do_next_xpar_bank
3369: public xpar_high_nibble_F
3370: public xpar_high_nibble_E
3371: public xpar_high_nibble_D
3372: public xpar_high_nibble_C
3373: public xpar_high_nibble_B
3374: public xpar_high_nibble_8
3375: public xpar_high_nibble_6
3376: public xpar_high_nibble_5
3377: public xpar_high_nibble_4
3378: public xpar_high_nibble_7
3379: public xpar_high_nibble_3
3380: public xpar_high_nibble_A
3381: public xpar_high_nibble_2
3382: public xpar_high_nibble_9
3383: public xpar_high_nibble_1
3384: public xpar_high_nibble_0
3385: public xpar_low_nibble_F
3386: public xpar_low_nibble_E
3387: public xpar_low_nibble_D
3388: public xpar_low_nibble_C
3389: public xpar_low_nibble_B
3390: public xpar_low_nibble_8
3391: public xpar_low_nibble_6
3392: public xpar_low_nibble_5
3393: public xpar_low_nibble_4
3394: public xpar_low_nibble_7
3395: public xpar_low_nibble_3
3396: public xpar_low_nibble_A
3397: public xpar_low_nibble_2
3398: public xpar_low_nibble_9
3399: public xpar_low_nibble_1
3400: public xpar_low_nibble_0
3401:
3402:
3403: end
3404:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.