|
|
1.1 root 1: ;---------------------------Module-Header------------------------------;
2: ; Module Name: lines.asm
3: ;
4: ; Draws a set of connected polylines.
5: ;
6: ; The actual pixel-lighting code is different depending on if the lines
7: ; are styled/unstyled and we're doing an arbitrary ROP or set-style ROP.
8: ;
9: ; Lines are drawn from left to right. So if a line moves from right
10: ; to left, the endpoints are swapped and the line is drawn from left to
11: ; right.
12: ;
13: ; See s3\lines.cxx for a portable version (sans simple clipping).
14: ;
15: ; Copyright (c) 1992 Microsoft Corporation
16: ;-----------------------------------------------------------------------;
17:
18: .386
19:
20: .model small,c
21:
22: assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
23: assume fs:nothing,gs:nothing
24:
25: .xlist
26: include stdcall.inc ;calling convention cmacros
27: include i386\egavga.inc
28: include i386\strucs.inc
29: include i386\driver.inc
30: include i386\lines.inc
31: .list
32:
33: .data
34:
35: public gaflRoundTable
36: gaflRoundTable label dword
37: dd FL_H_ROUND_DOWN + FL_V_ROUND_DOWN ; no flips
38: dd FL_H_ROUND_DOWN + FL_V_ROUND_DOWN ; D flip
39: dd FL_H_ROUND_DOWN ; V flip
40: dd FL_V_ROUND_DOWN ; D & V flip
41: dd FL_V_ROUND_DOWN ; slope one
42: dd 0baadf00dh
43: dd FL_H_ROUND_DOWN ; slope one & V flip
44: dd 0baadf00dh
45:
46: .code
47:
48: ;--------------------------------Macro----------------------------------;
49: ; testb ebx, <mask>
50: ;
51: ; Substitutes a byte compare if the mask is entirely in the lo-byte or
52: ; hi-byte (thus saving 3 bytes of code space).
53: ;
54: ;-----------------------------------------------------------------------;
55:
56: TESTB macro targ,mask,thirdarg
57: local mask2,delta
58:
59: ifnb <thirdarg>
60: .err TESTB mask must be enclosed in brackets!
61: endif
62:
63: delta = 0
64: mask2 = mask
65:
66: if mask2 AND 0ffff0000h
67: test targ,mask ; If bit set in hi-word,
68: exitm ; test entire dword
69: endif
70:
71: if mask2 AND 0ff00h
72: if mask2 AND 0ffh ; If bit set in lo-byte and
73: test targ,mask ; hi-byte, test entire dword
74: exitm
75: endif
76:
77: mask2 = mask2 SHR 8
78: delta = 1
79: endif
80:
81: ifidni <targ>,<EBX>
82: if delta
83: test bh,mask2
84: else
85: test bl,mask2
86: endif
87: exitm
88: endif
89:
90: .err Too bad TESTB doesn't support targets other than ebx!
91: endm
92:
93: ;---------------------------Public-Routine------------------------------;
94: ; BOOL bLines(ppdev, pptfxFirst, pptfxBuf, prun, cptfx, pls,
95: ; prclClip, apfn[], flStart)
96: ;
97: ; Do all the DDA calculations for lines.
98: ;
99: ; Doing Lines Right
100: ; -----------------
101: ;
102: ; In NT, all lines are given to the device driver in fractional
103: ; coordinates, in a 28.4 fixed point format. The lower 4 bits are
104: ; fractional for sub-pixel positioning.
105: ;
106: ; Note that you CANNOT! just round the coordinates to integers
107: ; and pass the results to your favorite integer Bresenham routine!!
108: ; (Unless, of course, you have such a high resolution device that
109: ; nobody will notice -- not likely for a display device.) The
110: ; fractions give a more accurate rendering of the line -- this is
111: ; important for things like our Bezier curves, which would have 'kinks'
112: ; if the points in its polyline approximation were rounded to integers.
113: ;
114: ; Unfortunately, for fractional lines there is more setup work to do
115: ; a DDA than for integer lines. However, the main loop is exactly
116: ; the same (and can be done entirely with 32 bit math).
117: ;
118: ; If You've Got Hardware That Does Bresenham
119: ; ------------------------------------------
120: ;
121: ; A lot of hardware limits DDA error terms to 'n' bits. With fractional
122: ; coordinates, 4 bits are given to the fractional part, letting
123: ; you draw in hardware only those lines that lie entirely in a 2^(n-4)
124: ; by 2^(n-4) pixel space.
125: ;
126: ; And you still have to correctly draw those lines with coordinates
127: ; outside that space! Remember that the screen is only a viewport
128: ; onto a 28.4 by 28.4 space -- if any part of the line is visible
129: ; you MUST render it precisely, regardless of where the end points lie.
130: ; So even if you do it in software, somewhere you'll have to have a
131: ; 32 bit DDA routine.
132: ;
133: ; Our Implementation
134: ; ------------------
135: ;
136: ; We employ a run length slice algorithm: our DDA calculates the
137: ; number of pixels that are in each row (or 'strip') of pixels.
138: ;
139: ; We've separated the running of the DDA and the drawing of pixels:
140: ; we run the DDA for several iterations and store the results in
141: ; a 'strip' buffer (which are the lengths of consecutive pixel rows of
142: ; the line), then we crank up a 'strip drawer' that will draw all the
143: ; strips in the buffer.
144: ;
145: ; We also employ a 'half-flip' to reduce the number of strip
146: ; iterations we need to do in the DDA and strip drawing loops: when a
147: ; (normalized) line's slope is more than 1/2, we do a final flip
148: ; about the line y = (1/2)x. So now, instead of each strip being
149: ; consecutive horizontal or vertical pixel rows, each strip is composed
150: ; of those pixels aligned in 45 degree rows. So a line like (0, 0) to
151: ; (128, 128) would generate only one strip.
152: ;
153: ; We also always draw only left-to-right.
154: ;
155: ; Style lines may have arbitrary style patterns. We specially
156: ; optimize the default patterns (and call them 'masked' styles).
157: ;
158: ; The DDA Derivation
159: ; ------------------
160: ;
161: ; Here is how I like to think of the DDA calculation.
162: ;
163: ; We employ Knuth's "diamond rule": rendering a one-pixel-wide line
164: ; can be thought of as dragging a one-pixel-wide by one-pixel-high
165: ; diamond along the true line. Pixel centers lie on the integer
166: ; coordinates, and so we light any pixel whose center gets covered
167: ; by the "drag" region (John D. Hobby, Journal of the Association
168: ; for Computing Machinery, Vol. 36, No. 2, April 1989, pp. 209-229).
169: ;
170: ; We must define which pixel gets lit when the true line falls
171: ; exactly half-way between two pixels. In this case, we follow
172: ; the rule: when two pels are equidistant, the upper or left pel
173: ; is illuminated, unless the slope is exactly one, in which case
174: ; the upper or right pel is illuminated. (So we make the edges
175: ; of the diamond exclusive, except for the top and left vertices,
176: ; which are inclusive, unless we have slope one.)
177: ;
178: ; This metric decides what pixels should be on any line BEFORE it is
179: ; flipped around for our calculation. Having a consistent metric
180: ; this way will let our lines blend nicely with our curves. The
181: ; metric also dictates that we will never have one pixel turned on
182: ; directly above another that's turned on. We will also never have
183: ; a gap; i.e., there will be exactly one pixel turned on for each
184: ; column between the start and end points. All that remains to be
185: ; done is to decide how many pixels should be turned on for each row.
186: ;
187: ; So lines we draw will consist of varying numbers of pixels on
188: ; successive rows, for example:
189: ;
190: ; ******
191: ; *****
192: ; ******
193: ; *****
194: ;
195: ; We'll call each set of pixels on a row a "strip".
196: ;
197: ; (Please remember that our coordinate space has the origin as the
198: ; upper left pixel on the screen; postive y is down and positive x
199: ; is right.)
200: ;
201: ; Device coordinates are specified as fixed point 28.4 numbers,
202: ; where the first 28 bits are the integer coordinate, and the last
203: ; 4 bits are the fraction. So coordinates may be thought of as
204: ; having the form (x, y) = (M/F, N/F) where F is the constant scaling
205: ; factor F = 2^4 = 16, and M and N are 32 bit integers.
206: ;
207: ; Consider the line from (M0/F, N0/F) to (M1/F, N1/F) which runs
208: ; left-to-right and whose slope is in the first octant, and let
209: ; dM = M1 - M0 and dN = N1 - N0. Then dM >= 0, dN >= 0 and dM >= dN.
210: ;
211: ; Since the slope of the line is less than 1, the edges of the
212: ; drag region are created by the top and bottom vertices of the
213: ; diamond. At any given pixel row y of the line, we light those
214: ; pixels whose centers are between the left and right edges.
215: ;
216: ; Let mL(n) denote the line representing the left edge of the drag
217: ; region. On pixel row j, the column of the first pixel to be
218: ; lit is
219: ;
220: ; iL(j) = ceiling( mL(j * F) / F)
221: ;
222: ; Since the line's slope is less than one:
223: ;
224: ; iL(j) = ceiling( mL([j + 1/2] F) / F )
225: ;
226: ; Recall the formula for our line:
227: ;
228: ; n(m) = (dN / dM) (m - M0) + N0
229: ;
230: ; m(n) = (dM / dN) (n - N0) + M0
231: ;
232: ; Since the line's slope is less than one, the line representing
233: ; the left edge of the drag region is the original line offset
234: ; by 1/2 pixel in the y direction:
235: ;
236: ; mL(n) = (dM / dN) (n - F/2 - N0) + M0
237: ;
238: ; From this we can figure out the column of the first pixel that
239: ; will be lit on row j, being careful of rounding (if the left
240: ; edge lands exactly on an integer point, the pixel at that
241: ; point is not lit because of our rounding convention):
242: ;
243: ; iL(j) = floor( mL(j F) / F ) + 1
244: ;
245: ; = floor( ((dM / dN) (j F - F/2 - N0) + M0) / F ) + 1
246: ;
247: ; = floor( F dM j - F/2 dM - N0 dM + dN M0) / F dN ) + 1
248: ;
249: ; F dM j - [ dM (N0 + F/2) - dN M0 ]
250: ; = floor( ---------------------------------- ) + 1
251: ; F dN
252: ;
253: ; dM j - [ dM (N0 + F/2) - dN M0 ] / F
254: ; = floor( ------------------------------------ ) + 1 (1)
255: ; dN
256: ;
257: ; = floor( (dM j + alpha) / dN ) + 1
258: ;
259: ; where
260: ;
261: ; alpha = - [ dM (N0 + F/2) - dN M0 ] / F
262: ;
263: ; We use equation (1) to calculate the DDA: there are iL(j+1) - iL(j)
264: ; pixels in row j. Because we are always calculating iL(j) for
265: ; integer quantities of j, we note that the only fractional term
266: ; is constant, and so we can 'throw away' the fractional bits of
267: ; alpha:
268: ;
269: ; beta = floor( - [ dM (N0 + F/2) - dN M0 ] / F ) (2)
270: ;
271: ; so
272: ;
273: ; iL(j) = floor( (dM j + beta) / dN ) + 1 (3)
274: ;
275: ; for integers j.
276: ;
277: ; Note if iR(j) is the line's rightmost pixel on row j, that
278: ; iR(j) = iL(j + 1) - 1.
279: ;
280: ; Similarly, rewriting equation (1) as a function of column i,
281: ; we can determine, given column i, on which pixel row j is the line
282: ; lit:
283: ;
284: ; dN i + [ dM (N0 + F/2) - dN M0 ] / F
285: ; j(i) = ceiling( ------------------------------------ ) - 1
286: ; dM
287: ;
288: ; Floors are easier to compute, so we can rewrite this:
289: ;
290: ; dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F
291: ; j(i) = floor( ----------------------------------------------- ) - 1
292: ; dM
293: ;
294: ; dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F - dM
295: ; = floor( ---------------------------------------------------- )
296: ; dM
297: ;
298: ; dN i + [ dM (N0 + F/2) - dN M0 - 1 ] / F
299: ; = floor( ---------------------------------------- )
300: ; dM
301: ;
302: ; We can once again wave our hands and throw away the fractional bits
303: ; of the remainder term:
304: ;
305: ; j(i) = floor( (dN i + gamma) / dM ) (4)
306: ;
307: ; where
308: ;
309: ; gamma = floor( [ dM (N0 + F/2) - dN M0 - 1 ] / F ) (5)
310: ;
311: ; We now note that
312: ;
313: ; beta = -gamma - 1 = ~gamma (6)
314: ;
315: ; To draw the pixels of the line, we could evaluate (3) on every scan
316: ; line to determine where the strip starts. Of course, we don't want
317: ; to do that because that would involve a multiply and divide for every
318: ; scan. So we do everything incrementally.
319: ;
320: ; We would like to easily compute c , the number of pixels on scan j:
321: ; j
322: ;
323: ; c = iL(j + 1) - iL(j)
324: ; j
325: ;
326: ; = floor((dM (j + 1) + beta) / dN) - floor((dM j + beta) / dN) (7)
327: ;
328: ; This may be rewritten as
329: ;
330: ; c = floor(i + r / dN) - floor(i + r / dN) (8)
331: ; j j+1 j+1 j j
332: ;
333: ; where i , i are integers and r < dN, r < dN.
334: ; j j+1 j j+1
335: ;
336: ; Rewriting (7) again:
337: ;
338: ; c = floor(i + r / dN + dM / dN) - floor(i + r / dN)
339: ; j j j j j
340: ;
341: ;
342: ; = floor((r + dM) / dN) - floor(r / dN)
343: ; j j
344: ;
345: ; This may be rewritten as
346: ;
347: ; c = dI + floor((r + dR) / dN) - floor(r / dN)
348: ; j j j
349: ;
350: ; where dI + dR / dN = dM / dN, dI is an integer and dR < dN.
351: ;
352: ; r is the remainder (or "error") term in the DDA loop: r / dN
353: ; j j
354: ; is the exact fraction of a pixel at which the strip ends. To go
355: ; on to the next scan and compute c we need to know r .
356: ; j+1 j+1
357: ;
358: ; So in the main loop of the DDA:
359: ;
360: ; c = dI + floor((r + dR) / dN) and r = (r + dR) % dN
361: ; j j j+1 j
362: ;
363: ; and we know r < dN, r < dN, and dR < dN.
364: ; j j+1
365: ;
366: ; We have derived the DDA only for lines in the first octant; to
367: ; handle other octants we do the common trick of flipping the line
368: ; to the first octant by first making the line left-to-right by
369: ; exchanging the end-points, then flipping about the lines y = 0 and
370: ; y = x, as necessary. We must record the transformation so we can
371: ; undo them later.
372: ;
373: ; We must also be careful of how the flips affect our rounding. If
374: ; to get the line to the first octant we flipped about x = 0, we now
375: ; have to be careful to round a y value of 1/2 up instead of down as
376: ; we would for a line originally in the first octant (recall that
377: ; "In the case where two pels are equidistant, the upper or left
378: ; pel is illuminated...").
379: ;
380: ; To account for this rounding when running the DDA, we shift the line
381: ; (or not) in the y direction by the smallest amount possible. That
382: ; takes care of rounding for the DDA, but we still have to be careful
383: ; about the rounding when determining the first and last pixels to be
384: ; lit in the line.
385: ;
386: ; Determining The First And Last Pixels In The Line
387: ; -------------------------------------------------
388: ;
389: ; Fractional coordinates also make it harder to determine which pixels
390: ; will be the first and last ones in the line. We've already taken
391: ; the fractional coordinates into account in calculating the DDA, but
392: ; the DDA cannot tell us which are the end pixels because it is quite
393: ; happy to calculate pixels on the line from minus infinity to positive
394: ; infinity.
395: ;
396: ; The diamond rule determines the start and end pixels. (Recall that
397: ; the sides are exclusive except for the left and top vertices.)
398: ; This convention can be thought of in another way: there are diamonds
399: ; around the pixels, and wherever the true line crosses a diamond,
400: ; that pel is illuminated.
401: ;
402: ; Consider a line where we've done the flips to the first octant, and the
403: ; floor of the start coordinates is the origin:
404: ;
405: ; +-----------------------> +x
406: ; |
407: ; | 0 1
408: ; | 0123456789abcdef
409: ; |
410: ; | 0 00000000?1111111
411: ; | 1 00000000 1111111
412: ; | 2 0000000 111111
413: ; | 3 000000 11111
414: ; | 4 00000 ** 1111
415: ; | 5 0000 ****1
416: ; | 6 000 1***
417: ; | 7 00 1 ****
418: ; | 8 ? ***
419: ; | 9 22 3 ****
420: ; | a 222 33 ***
421: ; | b 2222 333 ****
422: ; | c 22222 3333 **
423: ; | d 222222 33333
424: ; | e 2222222 333333
425: ; | f 22222222 3333333
426: ; |
427: ; | 2 3
428: ; v
429: ; +y
430: ;
431: ; If the start of the line lands on the diamond around pixel 0 (shown by
432: ; the '0' region here), pixel 0 is the first pel in the line. The same
433: ; is true for the other pels.
434: ;
435: ; A little more work has to be done if the line starts in the
436: ; 'nether-land' between the diamonds (as illustrated by the '*' line):
437: ; the first pel lit is the first diamond crossed by the line (pixel 1 in
438: ; our example). This calculation is determined by the DDA or slope of
439: ; the line.
440: ;
441: ; If the line starts exactly half way between two adjacent pixels
442: ; (denoted here by the '?' spots), the first pixel is determined by our
443: ; round-down convention (and is dependent on the flips done to
444: ; normalize the line).
445: ;
446: ; Last Pel Exclusive
447: ; ------------------
448: ;
449: ; To eliminate repeatedly lit pels between continuous connected lines,
450: ; we employ a last-pel exclusive convention: if the line ends exactly on
451: ; the diamond around a pel, that pel is not lit. (This eliminates the
452: ; checks we had in the old code to see if we were re-lighting pels.)
453: ;
454: ; The Half Flip
455: ; -------------
456: ;
457: ; To make our run length algorithm more efficient, we employ a "half
458: ; flip". If after normalizing to the first octant, the slope is more
459: ; than 1/2, we subtract the y coordinate from the x coordinate. This
460: ; has the effect of reflecting the coordinates through the line of slope
461: ; 1/2. Note that the diagonal gets mapped into the x-axis after a half
462: ; flip.
463: ;
464: ; How Many Bits Do We Need, Anyway?
465: ; ---------------------------------
466: ;
467: ; Note that if the line is visible on your screen, you must light up
468: ; exactly the correct pixels, no matter where in the 28.4 x 28.4 device
469: ; space the end points of the line lie (meaning you must handle 32 bit
470: ; DDAs, you can certainly have optimized cases for lesser DDAs).
471: ;
472: ; We move the origin to (floor(M0 / F), floor(N0 / F)), so when we
473: ; calculate gamma from (5), we know that 0 <= M0, N0 < F. And we
474: ; are in the first octant, so dM >= dN. Then we know that gamma can
475: ; be in the range [(-1/2)dM, (3/2)dM]. The DDI guarantees us that
476: ; valid lines will have dM and dN values at most 31 bits (unsigned)
477: ; of significance. So gamma requires 33 bits of significance (we store
478: ; this as a 64 bit number for convenience).
479: ;
480: ; When running through the DDA loop, r + dR can have a value in the
481: ; j
482: ; range 0 <= r < 2 dN; thus the result must be a 32 bit unsigned value.
483: ; j
484: ;
485: ; Testing Lines
486: ; -------------
487: ;
488: ; To be NT compliant, a display driver must exactly adhere to GIQ,
489: ; which means that for any given line, the driver must light exactly
490: ; the same pels as does GDI. This can be tested using the Guiman tool
491: ; provided elsewhere in the DDK, and 'ZTest', which draws random lines
492: ; on the screen and to a bitmap, and compares the results.
493: ;
494: ; If You've Got Line Hardware
495: ; ---------------------------
496: ;
497: ; If your hardware already adheres to GIQ, you're all set. Otherwise
498: ; you'll want to look at the S3 sample code and read the following:
499: ;
500: ; 1) You'll want to special case integer-only lines, since they require
501: ; less processing time and are more common (CAD programs will probably
502: ; only ever give integer lines). GDI does not provide a flag saying
503: ; that all lines in a path are integer lines; consequently, you will
504: ; have to explicitly check every line.
505: ;
506: ; 2) You are required to correctly draw any line in the 28.4 device
507: ; space that intersects the viewport. If you have less than 32 bits
508: ; of significance in the hardware for the Bresenham terms, extremely
509: ; long lines would overflow the hardware. For such (rare) cases, you
510: ; can fall back to strip-drawing code, of which there is a C version in
511: ; the S3's lines.cxx (or if your display is a frame buffer, fall back
512: ; to the engine).
513: ;
514: ; 3) If you can explicitly set the Bresenham terms in your hardware, you
515: ; can draw non-integer lines using the hardware. If your hardware has
516: ; 'n' bits of precision, you can draw GIQ lines that are up to 2^(n-5)
517: ; pels long (4 bits are required for the fractional part, and one bit is
518: ; used as a sign bit). Note that integer lines don't require the 4
519: ; fractional bits, so if you special case them as in 1), you can do
520: ; integer lines that are up to 2^(n - 1) pels long. See the S3's
521: ; fastline.asm for an example.
522: ;
523: ;-----------------------------------------------------------------------;
524:
525: cProc bLines,36,< \
526: uses esi edi ebx, \
527: ppdev: ptr, \
528: pptfxFirst: ptr, \
529: pptfxBuf: ptr, \
530: prun: ptr, \
531: cptfx: dword, \
532: pls: ptr, \
533: prclClip: ptr, \
534: apfn: ptr, \
535: flStart: dword >
536:
537: ; ppdev: Surface data
538: ; pptfxFirst: Start point of first line
539: ; pptfxBuf: All subsequent points
540: ; prun: Array of runs if doing complex clipping
541: ; cptfx: Number of points in pptfxBuf (i.e., # lines)
542: ; pls: Line state
543: ; prclClip: Clip rectangle if doing simple clipping
544: ; apfn: Pointer to table of strip drawers
545: ; flStart: Flags for all lines
546:
547: local cPelsAfterThisBank: dword ; For bank switching
548: local cStripsInNextRun: dword ; For bank switching
549: local pptfxBufEnd: ptr ; Last point in pptfxBuf
550: local M0: dword ; Normalized x0 in device coords
551: local dM: dword ; Delta-x in device coords
552: local N0: dword ; Normalized y0 in device coords
553: local dN: dword ; Delta-y in device coords
554: local fl: dword ; Flags for current line
555: local x: dword ; Normalized start pixel x-coord
556: local y: dword ; Normalized start pixel y-coord
557: local eqGamma_lo: dword ; Upper 32 bits of Gamma
558: local eqGamma_hi: dword ; Lower 32 bits of Gamma
559: local x0: dword ; Start pixel x-offset
560: local y0: dword ; Start pixel y-offset
561: local ulSlopeOneAdjustment: dword ; Special offset if line of slope 1
562: local cStylePels: dword ; # of pixels in line (before clip)
563: local xStart: dword ; Start pixel x-offset before clip
564: local pfn: ptr ; Pointer to strip drawing function
565: local cPels: dword ; # pixels to be drawn (after clip)
566: local i: dword ; # pixels in strip
567: local r: dword ; Remainder (or "error") term
568: local d_I: dword ; Delta-I
569: local d_R: dword ; Delta-R
570: local plStripEnd: ptr ; Last strip in buffer
571: local ptlStart[size POINTL]: byte ; Unnormalized start coord
572: local dN_Original: dword ; dN before half-flip
573: local xClipLeft: dword ; Left side of clip rectangle
574: local xClipRight: dword ; Right side of clip rectangle
575: local strip[size STRIPS]: byte ; Our strip buffer
576:
577: ; Do some initializing:
578:
579: mov esi, pls
580: mov ecx, cptfx
581: mov edx, pptfxBuf
582: lea eax, [edx + ecx * (size POINTL) - (size POINTL)]
583: mov pptfxBufEnd, eax ; pptfxBufEnd is inclusive of end point
584:
585: mov eax, [esi].LS_chAndXor ; copy chAndXor from LINESTATE to STRIPS
586: mov strip.ST_chAndXor, eax ; buffer
587:
588: mov eax, [edx].ptl_x ; Load up end point (M1, N1)
589: mov edi, [edx].ptl_y
590:
591: mov edx, pptfxFirst ; Load up start point (M0, N0)
592: mov esi, [edx].ptl_x
593: mov ecx, [edx].ptl_y
594:
595: mov ebx, flStart
596:
597: ;-----------------------------------------------------------------------;
598: ; Flip to the first octant. ;
599: ;-----------------------------------------------------------------------;
600:
601: ; Register state: esi = M0
602: ; ecx = N0
603: ; eax = dM (M1)
604: ; edi = dN (N1)
605: ; ebx = fl
606:
607: ; Make sure we go left to right:
608:
609: public the_main_loop
610: the_main_loop:
611: cmp esi, eax
612: jle short is_left_to_right ; skip if M0 <= M1
613: xchg esi, eax ; swap M0, M1
614: xchg ecx, edi ; swap N0, N1
615: or ebx, FL_FLIP_H
616:
617: is_left_to_right:
618:
619: ; Compute the deltas, remembering that the DDI says we should get
620: ; deltas less than 2^31. If we get more, we ensure we don't crash
621: ; later on by simply skipping the line:
622:
623: sub eax, esi ; eax = dM
624: jo next_line ; dM must be less than 2^31
625: sub edi, ecx ; edi = dN
626: jo next_line ; dN must be less than 2^31
627:
628: jge short is_top_to_bottom ; skip if dN >= 0
629: neg ecx ; N0 = -N0
630: neg edi ; N1 = -N1
631: or ebx, FL_FLIP_V
632:
633: is_top_to_bottom:
634: cmp edi, eax
635: jb short done_flips ; skip if dN < dM
636: jne short slope_more_than_one
637:
638: ; We must special case slopes of one (because of our rounding convention):
639:
640: or ebx, FL_FLIP_SLOPE_ONE
641: jmp short done_flips
642:
643: slope_more_than_one:
644: xchg eax, edi ; swap dM, dN
645: xchg esi, ecx ; swap M0, N0
646: or ebx, FL_FLIP_D
647:
648: done_flips:
649:
650: mov edx, ebx
651: and edx, FL_ROUND_MASK
652: .errnz FL_ROUND_SHIFT - 2
653: or ebx, [gaflRoundTable + edx] ; get our rounding flags
654:
655: mov dM, eax ; save some info
656: mov dN, edi
657: mov fl, ebx
658:
659: ; We're going to shift our origin so that it's at the closest integer
660: ; coordinate to the left/above our fractional start point (it makes
661: ; the math quicker):
662:
663: mov edx, esi ; x = LFLOOR(M0)
664: sar edx, FLOG2
665: mov x, edx
666:
667: mov edx, ecx ; y = LFLOOR(N0)
668: sar edx, FLOG2
669: mov y, edx
670:
671: ;-----------------------------------------------------------------------;
672: ; Compute the fractional remainder term ;
673: ;-----------------------------------------------------------------------;
674:
675: ; By shifting the origin we've contrived to eliminate the integer
676: ; portion of our fractional start point, giving us start point
677: ; fractional coordinates in the range [0, F - 1]:
678:
679: and esi, F - 1 ; M0 = FXFRAC(M0)
680: and ecx, F - 1 ; N0 = FXFRAC(N0)
681:
682: ; We now compute Gamma:
683:
684: mov M0, esi ; save M0, N0 for later
685: mov N0, ecx
686:
687: lea edx, [ecx + F/2]
688: mul edx ; [edx:eax] = dM * (N0 + F/2)
689: xchg eax, edi
690: mov ecx, edx ; [ecx:edi] = dM * (N0 + F/2)
691: ; (we just nuked N0)
692:
693: mul esi ; [edx:eax] = dN * M0
694:
695: ; Now gamma = dM * (N0 + F/2) - dN * M0 - bRoundDown
696:
697: .errnz FL_V_ROUND_DOWN - 8000h
698: ror bh, 8
699: sbb edi, eax
700: sbb ecx, edx
701:
702: shrd edi, ecx, FLOG2
703: sar ecx, FLOG2 ; gamma = [ecx:edi] >>= 4
704:
705: mov eqGamma_hi, ecx
706: mov eqGamma_lo, edi
707:
708: mov eax, N0
709:
710: ; Register state:
711: ; eax = N0
712: ; ebx = fl
713: ; ecx = eqGamma_hi
714: ; edx = garbage
715: ; esi = M0
716: ; edi = eqGamma_lo
717:
718: testb ebx, FL_FLIP_H
719: jnz line_runs_right_to_left
720:
721: ;-----------------------------------------------------------------------;
722: ; Figure out which pixels are at the ends of a left-to-right line. ;
723: ; --------> ;
724: ;-----------------------------------------------------------------------;
725:
726: public line_runs_left_to_right
727: line_runs_left_to_right:
728: or esi, esi
729: jz short LtoR_check_slope_one
730: ; skip ahead if M0 == 0
731: ; (in that case, x0 = 0 which is to be
732: ; kept in esi, and is already
733: ; conventiently zero)
734:
735: or eax, eax
736: jnz short LtoR_N0_not_zero
737:
738: .errnz FL_H_ROUND_DOWN - 80h
739: ror bl, 8
740: sbb esi, -F/2
741: shr esi, FLOG2
742: jmp short LtoR_check_slope_one
743: ; esi = x0 = rounded M0
744:
745: LtoR_N0_not_zero:
746: sub eax, F/2
747: sbb edx, edx
748: xor eax, edx
749: sub eax, edx
750: cmp esi, eax
751: sbb esi, esi
752: inc esi ; esi = x0 = (abs(N0 - F/2) <= M0)
753:
754: public LtoR_check_slope_one
755: LtoR_check_slope_one:
756: mov ulSlopeOneAdjustment, 0
757: mov eax, ebx
758: and eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN
759: cmp eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN
760: jne short LtoR_compute_y0_from_x0
761:
762: ; We have to special case lines that are exactly of slope 1 or -1:
763:
764: mov eax, N0
765: add eax, dN
766: and eax, F - 1 ; eax = N1
767: jz short LtoR_slope_one_check_start_point
768:
769: mov edx, M0
770: add edx, dM
771: and edx, F - 1 ; edx = M1
772:
773: add eax, F/2
774: cmp edx, eax ; cmp M1, N1 + F/2
775: jne short LtoR_slope_one_check_start_point
776: mov ulSlopeOneAdjustment, -1
777:
778: LtoR_slope_one_check_start_point:
779: mov eax, M0
780: or eax, eax
781: jz short LtoR_compute_y0_from_x0
782:
783: add eax, F/2
784: cmp eax, N0 ; cmp M0 + 8, N0
785: jne short LtoR_compute_y0_from_x0
786:
787: xor esi, esi ; x0 = 0
788:
789: LtoR_compute_y0_from_x0:
790:
791: ; ecx = eqGamma_hi
792: ; esi = x0
793: ; edi = eqGamma_lo
794:
795: mov eax, dN
796: mov edx, dM
797:
798: mov x0, esi
799: mov y0, 0
800: cmp ecx, 0
801: jl short LtoR_compute_x1
802:
803: neg esi
804: and esi, eax
805: sub edx, esi
806: cmp edi, edx
807: mov edx, dM
808: jl short LtoR_compute_x1
809: mov y0, 1 ; y0 = floor((dN * x0 + eqGamma) / dM)
810:
811: LtoR_compute_x1:
812:
813: ; Register state:
814: ; eax = dN
815: ; ebx = fl
816: ; ecx = garbage
817: ; edx = dM
818: ; esi = garbage
819: ; edi = garbage
820:
821: mov esi, M0
822: add esi, edx
823: mov ecx, esi
824: shr esi, FLOG2
825: dec esi ; x1 = ((M0 + dM) >> 4) - 1
826: add esi, ulSlopeOneAdjustment
827: and ecx, F-1 ; M1 = (M0 + dM) & 15
828: jz done_first_pel_last_pel
829:
830: add eax, N0
831: and eax, F-1 ; N1 = (N0 + dN) & 15
832: jnz short LtoR_N1_not_zero
833:
834: .errnz FL_H_ROUND_DOWN - 80h
835: ror bl, 8
836: sbb ecx, -F/2
837: shr ecx, FLOG2 ; ecx = LROUND(M1, fl & FL_ROUND_DOWN)
838: add esi, ecx
839: jmp done_first_pel_last_pel
840:
841: LtoR_N1_not_zero:
842: sub eax, F/2
843: sbb edx, edx
844: xor eax, edx
845: sub eax, edx
846: cmp eax, ecx
847: jg done_first_pel_last_pel
848: inc esi
849: jmp done_first_pel_last_pel
850:
851: ;-----------------------------------------------------------------------;
852: ; Figure out which pixels are at the ends of a right-to-left line. ;
853: ; <-------- ;
854: ;-----------------------------------------------------------------------;
855:
856: ; Compute x0:
857:
858: public line_runs_right_to_left
859: line_runs_right_to_left:
860: mov x0, 1 ; x0 = 1
861: or eax, eax
862: jnz short RtoL_N0_not_zero
863:
864: xor edx, edx ; ulDelta = 0
865: .errnz FL_H_ROUND_DOWN - 80h
866: ror bl, 8
867: sbb esi, -F/2
868: shr esi, FLOG2 ; esi = LROUND(M0, fl & FL_H_ROUND_DOWN)
869: jz short RtoL_check_slope_one
870:
871: mov x0, 2
872: mov edx, dN
873: jmp short RtoL_check_slope_one
874:
875: RtoL_N0_not_zero:
876: sub eax, F/2
877: sbb edx, edx
878: xor eax, edx
879: sub eax, edx
880: add eax, esi ; eax = ABS(N0 - F/2) + M0
881: xor edx, edx ; ulDelta = 0
882: cmp eax, F
883: jle short RtoL_check_slope_one
884:
885: mov x0, 2 ; x0 = 2
886: mov edx, dN ; ulDelta = dN
887:
888: public RtoL_check_slope_one
889: RtoL_check_slope_one:
890: mov ulSlopeOneAdjustment, 0
891: mov eax, ebx
892: and eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN
893: cmp eax, FL_FLIP_SLOPE_ONE
894: jne short RtoL_compute_y0_from_x0
895:
896: ; We have to special case lines that are exactly of slope 1 or -1:
897:
898: mov eax, N0
899: add eax, dN
900: and eax, F - 1 ; eax = N1
901: jz short RtoL_slope_one_check_start_point
902:
903: mov esi, M0
904: add esi, dM
905: and esi, F - 1 ; esi = M1
906:
907: add eax, F/2
908: cmp esi, eax ; cmp M1, N1 + F/2
909: jne short RtoL_slope_one_check_start_point
910: mov ulSlopeOneAdjustment, 1
911:
912: RtoL_slope_one_check_start_point:
913: mov eax, M0
914: or eax, eax
915: jz short RtoL_compute_y0_from_x0
916:
917: add eax, F/2
918: cmp eax, N0 ; cmp M0 + 8, N0
919: jne short RtoL_compute_y0_from_x0
920:
921: mov x0, 2 ; x0 = 2
922: mov edx, dN ; ulDelta = dN
923:
924: RtoL_compute_y0_from_x0:
925:
926: ; eax = garbage
927: ; ebx = fl
928: ; ecx = eqGamma_hi
929: ; edx = ulDelta
930: ; esi = garbage
931: ; edi = eqGamma_lo
932:
933: mov eax, dN ; eax = dN
934: mov y0, 0 ; y0 = 0
935:
936: add edi, edx
937: adc ecx, 0 ; eqGamma += ulDelta
938: ; NOTE: Setting flags here!
939: mov edx, dM ; edx = dM
940: jl short RtoL_compute_x1 ; NOTE: Looking at the flags here!
941: jg short RtoL_y0_is_2
942:
943: lea ecx, [edx + edx]
944: sub ecx, eax ; ecx = 2 * dM - dN
945: cmp edi, ecx
946: jge short RtoL_y0_is_2
947:
948: sub ecx, edx ; ecx = dM - dN
949: cmp edi, ecx
950: jl short RtoL_compute_x1
951:
952: mov y0, 1
953: jmp short RtoL_compute_x1
954:
955: RtoL_y0_is_2:
956: mov y0, 2
957:
958: RtoL_compute_x1:
959:
960: ; Register state:
961: ; eax = dN
962: ; ebx = fl
963: ; ecx = garbage
964: ; edx = dM
965: ; esi = garbage
966: ; edi = garbage
967:
968: mov esi, M0
969: add esi, edx
970: mov ecx, esi
971: shr esi, FLOG2 ; x1 = (M0 + dM) >> 4
972: add esi, ulSlopeOneAdjustment
973: and ecx, F-1 ; M1 = (M0 + dM) & 15
974:
975: add eax, N0
976: and eax, F-1 ; N1 = (N0 + dN) & 15
977: jnz short RtoL_N1_not_zero
978:
979: .errnz FL_H_ROUND_DOWN - 80h
980: ror bl, 8
981: sbb ecx, -F/2
982: shr ecx, FLOG2 ; ecx = LROUND(M1, fl & FL_ROUND_DOWN)
983: add esi, ecx
984: jmp done_first_pel_last_pel
985:
986: RtoL_N1_not_zero:
987: sub eax, F/2
988: sbb edx, edx
989: xor eax, edx
990: sub eax, edx
991: add eax, ecx ; eax = ABS(N1 - F/2) + M1
992: cmp eax, F+1
993: sbb esi, -1
994:
995: done_first_pel_last_pel:
996:
997: ; Register state:
998: ; eax = garbage
999: ; ebx = fl
1000: ; ecx = garbage
1001: ; edx = garbage
1002: ; esi = x1
1003: ; edi = garbage
1004:
1005: mov ecx, x0
1006: lea edx, [esi + 1]
1007: sub edx, ecx ; edx = x1 - x0 + 1
1008:
1009: jle next_line
1010: mov cStylePels, edx
1011: mov xStart, ecx
1012:
1013: ;-----------------------------------------------------------------------;
1014: ; See if clipping or styling needs to be done. ;
1015: ;-----------------------------------------------------------------------;
1016:
1017: testb ebx, FL_CLIP
1018: jnz do_some_clipping
1019:
1020: ; Register state:
1021: ; eax = garbage
1022: ; ebx = fl
1023: ; ecx = x0 (stack variable correct too)
1024: ; edx = garbage
1025: ; esi = x1
1026: ; edi = garbage
1027:
1028: done_clipping:
1029: mov eax, y0
1030:
1031: sub esi, ecx
1032: inc esi ; esi = cPels = x1 - x0 + 1
1033: mov cPels, esi
1034:
1035: mov esi, ppdev
1036: add ecx, x ; ecx = ptlStart.ptl_x
1037: add eax, y ; eax = ptlStart.ptl_y
1038:
1039: mov esi, [esi].pdev_lNextScan ; we'll compute the sign of lNextScan
1040:
1041: testb ebx, FL_FLIP_D
1042: jz short do_v_unflip
1043: xchg ecx, eax
1044:
1045: do_v_unflip:
1046: testb ebx, FL_FLIP_V
1047: jz short done_unflips
1048: neg eax
1049: neg esi
1050:
1051: done_unflips:
1052: mov strip.ST_lNextScan, esi ; lNextScan now right for y-direction
1053: testb ebx, FL_STYLED
1054: jnz do_some_styling
1055:
1056: done_styling:
1057: lea edx, [strip.ST_alStrips + (STRIP_MAX * 4)]
1058: mov plStripEnd, edx
1059:
1060: mov cPelsAfterThisBank, 0
1061: mov cStripsInNextRun, 7fffffffh
1062:
1063: ;-----------------------------------------------------------------------;
1064: ; Do banking setup. ;
1065: ;-----------------------------------------------------------------------;
1066:
1067: public bank_setup
1068: bank_setup:
1069:
1070: ; Register state:
1071: ; eax = ptlStart.ptl_y
1072: ; ebx = fl
1073: ; ecx = ptlStart.ptl_x
1074: ; edx = garbage
1075: ; esi = garbage
1076: ; edi = garbage
1077:
1078: mov esi, ppdev
1079: cmp eax, [esi].pdev_rcl1WindowClip.yTop
1080: jl short bank_get_initial_bank ; ptlStart.y < rcl1WindowClip.yTop
1081:
1082: cmp eax, [esi].pdev_rcl1WindowClip.yBottom
1083: jl short bank_got_initial_bank ; ptlStart.y < rcl1WindowClip.yBot
1084:
1085: bank_get_initial_bank:
1086: mov ptlStart.ptl_y, eax ; Save ptlStart.ptl_y
1087: mov edi, ecx ; Save ptlStart.ptl_x
1088:
1089: .errnz JustifyTop
1090: .errnz JustifyBottom - 1
1091: .errnz FL_FLIP_V - 8
1092:
1093: mov ecx, ebx ; JustifyTop if line goes down,
1094: shr ecx, 3 ; JustifyBottom if line goes up
1095: and ecx, 1
1096:
1097: bank_justified:
1098: ptrCall <dword ptr [esi].pdev_pfnBankControl>, \
1099: <esi, eax, ecx>
1100:
1101: mov eax, ptlStart.ptl_y
1102: mov ecx, edi
1103:
1104: bank_got_initial_bank:
1105: testb ebx, FL_FLIP_D
1106: jz short bank_major_x
1107:
1108: bank_major_y:
1109: testb ebx, FL_FLIP_V
1110: jz short bank_major_y_down
1111: bank_major_y_up:
1112: lea edi, [eax + 1]
1113: sub edi, [esi].pdev_rcl1WindowClip.yTop
1114: jmp short bank_done_y_major
1115: bank_major_y_down:
1116: mov edi, [esi].pdev_rcl1WindowClip.yBottom
1117: sub edi, eax
1118: bank_done_y_major:
1119: mov esi, cPels
1120: sub esi, edi ; edi = cPelsInBank
1121: mov cPelsAfterThisBank, esi
1122: jle short done_bank_setup
1123: mov cPels, edi
1124: jmp short done_bank_setup
1125:
1126: bank_major_x:
1127: mov edi, dN
1128: shr edi, FLOG2
1129: add edi, y
1130:
1131: ; We're guessing at the y-position of the end pixel (it's too much work
1132: ; to compute the actual value) to see if the line spans more than one
1133: ; bank. We have to add at least a slop value of '3' because the actual
1134: ; start pixel may be may 2 off from 'y' because of end-pixel exclusiveness,
1135: ; and we have to add 1 more because we're taking the floor of (dN / F), to
1136: ; account for rounding:
1137:
1138: add edi, 3 ; yEnd = edi = y + LFLOOR(dN) + 3
1139: testb ebx, FL_FLIP_V
1140: jz short bank_major_x_down
1141: bank_major_x_up:
1142: mov edx, 1
1143: sub edx, [esi].pdev_rcl1WindowClip.yTop ; edx = -yNextBankStart
1144:
1145: cmp edi, edx
1146: lea edx, [edx + eax] ; edx = cStripsInNextRun
1147: jl short bank_major_x_done
1148:
1149: ; Line may go over bank boundary, so don't do a half flip:
1150:
1151: or ebx, FL_DONT_DO_HALF_FLIP
1152: jmp short bank_major_x_done
1153:
1154: bank_major_x_down:
1155: mov esi, [esi].pdev_rcl1WindowClip.yBottom ; esi = yNextBankStart
1156:
1157: mov edx, esi
1158: sub edx, eax ; edx = cStripsInNextRun
1159:
1160: cmp edi, esi
1161: jl short bank_major_x_done
1162: or ebx, FL_DONT_DO_HALF_FLIP
1163:
1164: bank_major_x_done:
1165: sub edx, STRIP_MAX
1166: mov cStripsInNextRun, edx
1167: jge short done_bank_setup
1168:
1169: lea edx, [strip.ST_alStrips + edx * 4 + (STRIP_MAX * 4)]
1170: mov plStripEnd, edx
1171:
1172: done_bank_setup:
1173:
1174: ;-----------------------------------------------------------------------;
1175: ; Setup to do DDA. ;
1176: ;-----------------------------------------------------------------------;
1177:
1178: ; Register state:
1179: ; eax = ptlStart.ptl_y
1180: ; ebx = fl
1181: ; ecx = ptlStart.ptl_x
1182: ; edx = garbage
1183: ; esi = garbage
1184: ; edi = garbage
1185:
1186: mov esi, ppdev
1187: mov edi, eax ; Now edi = ptlStart.ptl_y
1188: imul [esi].pdev_lNextScan
1189: add eax, [esi].pdev_pvBitmapStart
1190: add eax, ecx
1191: mov strip.ST_pjScreen, eax ; pjScreen = pchBits + ptlStart.y *
1192: ; cjDelta + ptlStart.x
1193:
1194: mov eax, dM
1195: mov ecx, dN
1196: mov esi, eqGamma_lo
1197: mov edi, eqGamma_hi
1198:
1199: ; Register state:
1200: ; eax = dM
1201: ; ebx = fl
1202: ; ecx = dN
1203: ; edx = garbage
1204: ; esi = eqGamma_lo
1205: ; edi = eqGamma_hi
1206:
1207: lea edx, [ecx + ecx] ; if (2 * dN > dM)
1208: cmp edx, eax
1209: mov edx, y0 ; Load y0 again
1210: jbe short after_half_flip
1211:
1212: test ebx, FL_DONT_DO_HALF_FLIP
1213: jnz short after_half_flip
1214:
1215: or ebx, FL_FLIP_HALF
1216: mov fl, ebx
1217:
1218: ; Do a half flip!
1219:
1220: not esi
1221: not edi
1222: add esi, eax
1223: adc edi, 0 ; eqGamma = -eqGamma - 1 + dM
1224:
1225: neg ecx
1226: add ecx, eax ; dN = dM - dN
1227:
1228: neg edx
1229: add edx, x0 ; y0 = x0 - y0
1230:
1231: after_half_flip:
1232: mov strip.ST_flFlips, ebx
1233: and ebx, FL_STRIP_MASK
1234:
1235: .errnz FL_STRIP_SHIFT
1236: mov eax, apfn
1237: lea eax, [eax + ebx * 4]
1238: mov eax, [eax]
1239: mov pfn, eax
1240: mov eax, dM
1241:
1242: ; Register state:
1243: ; eax = dM
1244: ; ebx = garbage
1245: ; ecx = dN
1246: ; edx = y0
1247: ; esi = eqGamma_lo
1248: ; edi = eqGamma_hi
1249:
1250: or ecx, ecx
1251: jz short zero_slope
1252:
1253: compute_dda_stuff:
1254: inc edx
1255: mul edx
1256: stc ; set the carry to accomplish -1
1257: sbb eax, esi
1258: sbb edx, edi ; (y0 + 1) * dM - eqGamma - 1
1259: div ecx
1260:
1261: mov esi, eax ; esi = i
1262: mov edi, edx ; edi = r
1263:
1264: xor edx, edx
1265: mov eax, dM
1266: div ecx ; edx = d_R, eax = d_I
1267: mov d_I, eax
1268:
1269: sub esi, x0
1270: inc esi
1271:
1272: done_dda_stuff:
1273: lea eax, [strip.ST_alStrips]
1274: mov ebx, cPels
1275:
1276: ;-----------------------------------------------------------------------;
1277: ; Do our main DDA loop. ;
1278: ;-----------------------------------------------------------------------;
1279:
1280: sub edi, ecx ; offset remainder term from [0..dN)
1281: ; to [-dN..0) so test in inner
1282: ; loop is quicker
1283: align 4
1284:
1285: ; Register state:
1286: ; eax = plStrip ; current pointer into strip array
1287: ; ebx = cPels ; total number of pels in line
1288: ; ecx = dN ; delta-N = rise in line
1289: ; edx = d_R ; d_I + d_R/dN = exact strip length
1290: ; esi = i ; length of current strip
1291: ; edi = r ; remainder term for current strip
1292: ; ; in range [-dN..0)
1293:
1294: public dda_loop
1295: dda_loop:
1296: sub ebx, esi ; subtract strip length from line length
1297: jle final_strip ; if negative, done with line
1298:
1299: mov [eax], esi ; write strip length to strip array
1300: add eax, 4
1301: cmp plStripEnd, eax ; is the strip array buffer full?
1302: jbe short output_strips ; if so, empty it
1303:
1304: ; The output_strips routine jumps to here when done:
1305:
1306: done_output_strips:
1307: mov esi, d_I ; our normal strip length
1308: add edi, edx ; adjust our remainder term
1309: jl short dda_loop
1310:
1311: sub edi, ecx ; our remainder became 1 or more, so
1312: inc esi ; we increment this strip length
1313: ; and adjust the remainder term
1314:
1315: ; We've unrolled our loop a bit, so this should look familiar to the above:
1316:
1317: sub ebx, esi ; subtract strip length from line length
1318: jle final_strip ; if negative, done with line
1319:
1320: mov [eax], esi ; write strip length to strip array
1321: add eax, 4 ; adjust strip pointer
1322:
1323: ; Note that banking requires us to check if the strip array is full here
1324: ; too (and note that if output_strips is called it will return to
1325: ; done_output_strips):
1326:
1327: cmp plStripEnd, eax
1328: jbe short output_strips
1329:
1330: mov esi, d_I ; our normal strip length
1331: add edi, edx ; adjust our remainder term
1332: jl short dda_loop
1333:
1334: sub edi, ecx ; our remainder became 1 or more, so
1335: inc esi ; adjust
1336: jmp short dda_loop
1337:
1338: zero_slope:
1339: mov esi, 7fffffffh
1340: jmp short done_dda_stuff
1341:
1342: ;-----------------------------------------------------------------------;
1343: ; Empty strips buffer & possibly do x-major bank switch. ;
1344: ;-----------------------------------------------------------------------;
1345:
1346: output_strips:
1347: mov d_R, edx
1348: mov cPels, ebx
1349: mov i, esi
1350: mov r, edi
1351: mov dN, ecx
1352:
1353: lea edx, [strip]
1354: mov ecx, pls
1355:
1356: ; Call our strip routine:
1357:
1358: ptrCall <dword ptr pfn>, \
1359: <edx, ecx, eax>
1360:
1361: ; It may be that we ran out of run in our strips buffer, and don't
1362: ; actually have to switch banks. See if that's the case:
1363:
1364: mov eax, cStripsInNextRun
1365: or eax, eax
1366: jg short done_strip_bank_switch
1367:
1368: ; We have to switch banks. See if we're going up or down:
1369:
1370: mov esi, ppdev
1371: test fl, FL_FLIP_V
1372: jz short bank_x_down
1373:
1374: bank_x_up:
1375: mov edi, strip.ST_pjScreen
1376: sub edi, [esi].pdev_pvBitmapStart
1377: mov ebx, [esi].pdev_rcl1WindowClip.yTop
1378: dec ebx ; we want yTop - 1 to be mapped in
1379:
1380: ; Map in the next higher bank:
1381:
1382: ptrCall <dword ptr [esi].pdev_pfnBankControl>, \
1383: <esi, ebx, JustifyBottom>; ebx, esi and edi are preserved
1384:
1385: lea eax, [ebx + 1]
1386: sub eax, [esi].pdev_rcl1WindowClip.yTop
1387: ; eax = # of scans can do in bank
1388:
1389: add edi, [esi].pdev_pvBitmapStart
1390: mov strip.ST_pjScreen, edi
1391:
1392: jmp short done_strip_bank_switch
1393:
1394: bank_x_down:
1395: mov edi, strip.ST_pjScreen
1396: sub edi, [esi].pdev_pvBitmapStart
1397: mov ebx, [esi].pdev_rcl1WindowClip.yBottom
1398:
1399: ; Map in the next lower bank:
1400:
1401: ptrCall <dword ptr [esi].pdev_pfnBankControl>, \
1402: <esi, ebx, JustifyTop> ; ebx, esi and edi are preserved
1403:
1404: mov eax, [esi].pdev_rcl1WindowClip.yBottom
1405: sub eax, ebx ; eax = # scans can do in bank
1406:
1407: add edi, [esi].pdev_pvBitmapStart
1408: mov strip.ST_pjScreen,edi
1409:
1410: done_strip_bank_switch:
1411:
1412: ; eax = cStripsInNextRun
1413:
1414: lea edx, [strip.ST_alStrips + (STRIP_MAX * 4)]
1415: sub eax, STRIP_MAX
1416: mov cStripsInNextRun, eax
1417: jge short get_ready_for_more_strips
1418: lea edx, [edx + eax * 4]
1419:
1420: get_ready_for_more_strips:
1421: mov plStripEnd, edx
1422:
1423: mov esi, i
1424: mov edi, r
1425: mov ebx, cPels
1426: mov edx, d_R
1427: mov ecx, dN
1428: lea eax, [strip.ST_alStrips]
1429: jmp done_output_strips
1430:
1431: ;-----------------------------------------------------------------------;
1432: ; Empty strips buffer. Either get new line or do y-major bank switch. ;
1433: ;-----------------------------------------------------------------------;
1434:
1435: final_strip:
1436: add ebx, esi
1437: mov [eax], ebx
1438: add eax, 4
1439:
1440: cmp cPelsAfterThisBank, 0
1441: jg short bank_y_major
1442:
1443: very_final_strip:
1444: lea edx, [strip]
1445: mov ecx, pls
1446:
1447: ptrCall <dword ptr pfn>, \
1448: <edx, ecx, eax>
1449:
1450: ; NOTE: next_line is jumped to from various places, and it cannot assume
1451: ; any registers are loaded.
1452:
1453: next_line:
1454: mov ebx, flStart
1455: testb ebx, FL_COMPLEX_CLIP
1456: jnz short see_if_done_complex_clipping
1457:
1458: mov edx, pptfxBuf
1459: cmp edx, pptfxBufEnd
1460: je short all_done
1461:
1462: mov esi, [edx].ptl_x
1463: mov ecx, [edx].ptl_y
1464: add edx, size POINTL
1465: mov pptfxBuf, edx
1466: mov eax, [edx].ptl_x
1467: mov edi, [edx].ptl_y
1468: jmp the_main_loop
1469:
1470: all_done:
1471: mov eax, 1
1472:
1473: cRet bLines
1474:
1475: see_if_done_complex_clipping:
1476: mov ebx, fl
1477: dec cptfx
1478: jz short all_done
1479:
1480: and ebx, NOT FL_FLIP_HALF ; Make sure the next run doesn't have
1481: mov fl, ebx ; to do a half-flip if it doesn't
1482: ; want to
1483: jmp continue_complex_clipping
1484:
1485: ;-----------------------------------------------------------------------;
1486: ; Switch banks for a y-major line. ;
1487: ;-----------------------------------------------------------------------;
1488:
1489: public bank_y_major
1490: bank_y_major:
1491: mov d_R, edx
1492: mov i, esi
1493: mov r, edi
1494: mov dN, ecx
1495: sub ebx, esi ; Undo our offset
1496:
1497: bank_y_output_strips:
1498: lea edx, [strip]
1499: mov ecx, pls
1500:
1501: ptrCall <dword ptr pfn>, \
1502: <edx, ecx, eax>
1503:
1504: mov esi, ppdev
1505: test fl, FL_FLIP_V
1506: jz short bank_y_down
1507:
1508: bank_y_up:
1509: mov edi, strip.ST_pjScreen
1510: sub edi, [esi].pdev_pvBitmapStart
1511: mov ecx, [esi].pdev_rcl1WindowClip.yTop
1512: push ecx
1513: dec ecx ; we want yTop - 1 to be mapped in
1514:
1515: ; Map in the next higher bank:
1516:
1517: ptrCall <dword ptr [esi].pdev_pfnBankControl>, \
1518: <esi, ecx, JustifyBottom>; ebx, esi and edi are preserved
1519:
1520: pop ecx
1521: sub ecx, [esi].pdev_rcl1WindowClip.yTop
1522: ; ecx = # of scans can do in bank
1523:
1524: add edi, [esi].pdev_pvBitmapStart
1525: mov strip.ST_pjScreen, edi
1526:
1527: mov edx, cPelsAfterThisBank ; edx = cPelsAfterBank
1528: lea eax, [strip.ST_alStrips] ; eax = plStrip
1529: or ebx, ebx ; ebx = cPels
1530: jge bank_y_done_partial_strip
1531: jmp short bank_y_done_switch
1532:
1533: bank_y_down:
1534: mov edi, strip.ST_pjScreen
1535: sub edi, [esi].pdev_pvBitmapStart
1536: mov ecx, [esi].pdev_rcl1WindowClip.yBottom
1537: push ecx
1538:
1539: ; Map in the next lower bank:
1540:
1541: ptrCall <dword ptr [esi].pdev_pfnBankControl>, \
1542: <esi, ecx, JustifyTop> ; ebx, esi and edi are preserved
1543:
1544: pop eax
1545: mov ecx, [esi].pdev_rcl1WindowClip.yBottom
1546: sub ecx, eax ; ecx = # scans can do in bank
1547:
1548: add edi, [esi].pdev_pvBitmapStart
1549: mov strip.ST_pjScreen,edi
1550:
1551: mov edx, cPelsAfterThisBank ; edx = cPelsAfterBank
1552: lea eax, [strip.ST_alStrips] ; eax = plStrip
1553: or ebx, ebx ; ebx = cPels
1554: jge short bank_y_done_partial_strip
1555:
1556: bank_y_done_switch:
1557:
1558: ; Handle a single strip stretching over multiple banks:
1559:
1560: test fl, FL_FLIP_HALF
1561: jz short bank_y_no_half_flip
1562:
1563: ; We now have to adjust for the fact that the strip drawers always leave
1564: ; the state ready for the next new strip (e.g., if we're doing vertical
1565: ; strips, it advances pjScreen one to the right after drawing each strip).
1566: ; But the problem is that since we crossed a bank, we have to continue the
1567: ; *old* strip, so we have to undo that advance:
1568:
1569: bank_y_half_flip:
1570: inc strip.ST_pjScreen
1571: jmp short bank_y_done_bit_adjust
1572:
1573: bank_y_no_half_flip:
1574: dec strip.ST_pjScreen
1575:
1576: bank_y_done_bit_adjust:
1577: mov esi, ebx
1578: neg esi ; esi = # pels left in strip
1579:
1580: ; eax = pointer to first strip entry
1581: ; ebx = negative esi
1582: ; ecx = # of pels we can put down in this window
1583: ; edx = # of pels remaining to do in line
1584: ; esi = # of pels left in strip
1585:
1586: ; We have three special cases to check here:
1587: ;
1588: ; 1) If the strip spans the entire next window
1589: ; 2) This is the last strip in the line
1590: ; 3) Neither of the above
1591:
1592: cmp edx,ecx ;if line shorter than bank,
1593: jle short bank_y_check_if_last_strip; know strip doesn't span bank
1594:
1595: cmp esi,ecx ;if line spans bank, don't have
1596: jl short bank_y_continue_strip ; to check if last strip
1597:
1598: ; If ((# of pels in line > window size) && (# of pels in strip > window size))
1599: ; then the strip spans this bank:
1600:
1601: mov [eax], ecx
1602: add eax, 4
1603: add ebx, ecx
1604: sub edx, ecx
1605: mov cPelsAfterThisBank, edx
1606: jmp bank_y_output_strips
1607:
1608: bank_y_check_if_last_strip:
1609: cmp esi, edx ;if strip is shorter than line,
1610: jl short bank_y_continue_strip ; we know this isn't the last
1611: ; strip
1612:
1613: ; Handle case where this is the last strip in the line and it overlaps a bank:
1614:
1615: mov [eax], edx
1616: add eax, 4
1617: jmp very_final_strip
1618:
1619: bank_y_continue_strip:
1620: mov [eax], esi
1621: add eax, 4
1622:
1623: bank_y_done_partial_strip:
1624: add ebx, edx ; cPels += cPelsAfterThisBank
1625: sub edx, ecx ; cPelsAfterThisBank -= cyWindow
1626:
1627: jle short bank_y_get_ready
1628: sub ebx, edx
1629:
1630: bank_y_get_ready:
1631: mov cPelsAfterThisBank, edx
1632: mov edi, r
1633: mov edx, d_R
1634: mov ecx, dN
1635: jmp done_output_strips
1636:
1637: ;---------------------------Private-Routine-----------------------------;
1638: ; do_some_styling
1639: ;
1640: ; Inputs:
1641: ; eax = ptlStart.ptl_y
1642: ; ebx = fl
1643: ; ecx = ptlStart.ptl_x
1644: ; Preserves:
1645: ; eax, ebx, ecx
1646: ; Output:
1647: ; Exits to done_styling.
1648: ;
1649: ;-----------------------------------------------------------------------;
1650:
1651: public do_some_styling
1652: do_some_styling:
1653: mov esi, pls
1654: mov ptlStart.ptl_x, ecx
1655:
1656: mov edi, [esi].LS_spNext ; spThis
1657: mov edx, edi
1658: add edx, cStylePels ; spNext
1659:
1660: do_non_alternate_style:
1661:
1662: ; For styles, we don't bother to keep the style position normalized.
1663: ; (we do ensure that it's positive, though). If a figure is over 2
1664: ; billion pels long, we'll be a pel off in our style state (oops!).
1665:
1666: and edx, 7fffffffh
1667: mov [esi].LS_spNext, edx
1668: mov ptlStart.ptl_y, eax
1669:
1670: testb ebx, FL_FLIP_H
1671: jz short arbitrary_left_to_right
1672:
1673: sub edx, x0
1674: add edx, xStart
1675: mov eax, edx
1676: xor edx, edx
1677: div [esi].LS_spTotal
1678:
1679: neg edx
1680: jge short continue_right_to_left
1681: add edx, [esi].LS_spTotal
1682: not eax
1683:
1684: continue_right_to_left:
1685: mov edi, dword ptr [esi].LS_bStartIsGap
1686: not edi
1687: mov ecx, [esi].LS_aspRtoL
1688: jmp short compute_arbitrary_stuff
1689:
1690: arbitrary_left_to_right:
1691: add edi, x0
1692: sub edi, xStart
1693: mov eax, edi
1694: xor edx, edx
1695: div [esi].LS_spTotal
1696: mov edi, dword ptr [esi].LS_bStartIsGap
1697: mov ecx, [esi].LS_aspLtoR
1698:
1699: compute_arbitrary_stuff:
1700: ; eax = sp / spTotal
1701: ; ebx = fl
1702: ; ecx = pspStart
1703: ; edx = sp % spTotal
1704: ; esi = pls
1705: ; edi = bIsGap
1706:
1707: and eax, [esi].LS_cStyle ; if odd length style and second run
1708: and al, 1 ; through style array, flip the
1709: jz short odd_style_array_done ; meaning of the elements
1710: not edi
1711:
1712: odd_style_array_done:
1713: mov eax, [esi].LS_cStyle
1714: mov strip.ST_pspStart, ecx
1715: lea eax, [ecx + eax * 4 - 4]
1716: mov strip.ST_pspEnd, eax
1717:
1718: find_psp:
1719: sub edx, [ecx]
1720: jl short found_psp
1721: add ecx, 4
1722: jmp short find_psp
1723:
1724: found_psp:
1725: mov strip.ST_psp, ecx
1726: neg edx
1727: mov strip.ST_spRemaining, edx
1728:
1729: sub ecx, strip.ST_pspStart
1730: test ecx, 4 ; size STYLEPOS
1731: jz short done_arbitrary
1732: not edi
1733:
1734: done_arbitrary:
1735: mov dword ptr strip.ST_bIsGap, edi
1736: mov eax, ptlStart.ptl_y
1737: mov ecx, ptlStart.ptl_x
1738: jmp done_styling
1739:
1740: ;---------------------------Private-Routine-----------------------------;
1741: ; do_some_clipping
1742: ;
1743: ; Inputs:
1744: ; eax = garbage
1745: ; ebx = fl
1746: ; ecx = x0
1747: ; edx = garbage
1748: ; esi = x1
1749: ; edi = garbage
1750: ;
1751: ; Decides whether to do simple or complex clipping.
1752: ;
1753: ;-----------------------------------------------------------------------;
1754:
1755: align 4
1756:
1757: public do_some_clipping
1758: do_some_clipping:
1759: testb ebx, FL_COMPLEX_CLIP
1760: jnz initialize_complex_clipping
1761:
1762: ;-----------------------------------------------------------------------;
1763: ; simple_clipping
1764: ;
1765: ; Inputs:
1766: ; ebx = fl
1767: ; ecx = x0
1768: ; esi = x1
1769: ; Output:
1770: ; ebx = fl
1771: ; ecx = new x0 (stack variable updated too)
1772: ; esi = new x1
1773: ; y0 stack variable updated
1774: ; Uses:
1775: ; All registers
1776: ; Exits:
1777: ; to done_clipping
1778: ;
1779: ; This routine handles clipping the line to the clip rectangle (it's
1780: ; faster to handle this case in the driver than to call the engine to
1781: ; clip for us).
1782: ;
1783: ; Fractional end-point lines complicate our lives a bit when doing
1784: ; clipping:
1785: ;
1786: ; 1) For styling, we must know the unclipped line's length in pels, so
1787: ; that we can correctly update the styling state when the line is
1788: ; clipped. For this reason, I do clipping after doing the hard work
1789: ; of figuring out which pixels are at the ends of the line (this is
1790: ; wasted work if the line is not styled and is completely clipped,
1791: ; but I think it's simpler this way). Another reason is that we'll
1792: ; have calculated eqGamma already, which we use for the intercept
1793: ; calculations.
1794: ;
1795: ; With the assumption that most lines will not be completely clipped
1796: ; away, this strategy isn't too painful.
1797: ;
1798: ; 2) x0, y0 are not necessarily zero, where (x0, y0) is the start pel of
1799: ; the line.
1800: ;
1801: ; 3) We know x0, y0 and x1, but not y1. We haven't needed to calculate
1802: ; y1 until now. We'll need the actual value, and not an upper bound
1803: ; like y1 = LFLOOR(dM) + 2 because we have to be careful when
1804: ; calculating x(y) that y0 <= y <= y1, otherwise we can cause an
1805: ; overflow on the divide (which, needless to say, is bad).
1806: ;
1807: ;-----------------------------------------------------------------------;
1808:
1809: public simple_clipping
1810: simple_clipping:
1811: mov edi, prclClip ; get pointer to normalized clip rect
1812: and ebx, FL_RECTLCLIP_MASK ; (it's lower-right exclusive)
1813:
1814: .errnz (FL_RECTLCLIP_SHIFT - 2); ((ebx AND FL_RECTLCLIP_MASK) shr
1815: .errnz (size RECTL) - 16 ; FL_RECTLCLIP_SHIFT) is our index
1816: lea edi, [edi + ebx*4] ; into the array of rectangles
1817:
1818: mov edx, [edi].xRight ; load the rect coordinates
1819: mov eax, [edi].xLeft
1820: mov ebx, [edi].yBottom
1821: mov edi, [edi].yTop
1822:
1823: ; Translate to our origin and so some quick completely clipped tests:
1824:
1825: sub edx, x
1826: cmp ecx, edx
1827: jge totally_clipped ; totally clipped if x0 >= xRight
1828:
1829: sub eax, x
1830: cmp esi, eax
1831: jl totally_clipped ; totally clipped if x1 < xLeft
1832:
1833: sub ebx, y
1834: cmp y0, ebx
1835: jge totally_clipped ; totally clipped if y0 >= yBottom
1836:
1837: sub edi, y
1838:
1839: ; Save some state:
1840:
1841: mov xClipRight, edx
1842: mov xClipLeft, eax
1843:
1844: cmp esi, edx ; if (x1 >= xRight) x1 = xRight - 1
1845: jl short calculate_y1
1846: lea esi, [edx - 1]
1847:
1848: calculate_y1:
1849: mov eax, esi ; y1 = (x1 * dN + eqGamma) / dM
1850: mul dN
1851: add eax, eqGamma_lo
1852: adc edx, eqGamma_hi
1853: div dM
1854:
1855: cmp edi, eax ; if (yTop > y1) clipped
1856: jg short totally_clipped
1857:
1858: cmp ebx, eax ; if (yBottom > y1) know x1
1859: jg short x1_computed
1860:
1861: mov eax, ebx ; x1 = (yBottom * dM + eqBeta) / dN
1862: mul dM
1863: stc
1864: sbb eax, eqGamma_lo
1865: sbb edx, eqGamma_hi
1866: div dN
1867: mov esi, eax
1868:
1869: ; At this point, we've taken care of calculating the intercepts with the
1870: ; right and bottom edges. Now we work on the left and top edges:
1871:
1872: x1_computed:
1873: mov edx, y0
1874:
1875: mov eax, xClipLeft ; don't have to compute y intercept
1876: cmp eax, ecx ; at left edge if line starts to
1877: jle short top_intercept ; right of left edge
1878:
1879: mov ecx, eax ; x0 = xLeft
1880: mul dN ; y0 = (xLeft * dN + eqGamma) / dM
1881: add eax, eqGamma_lo
1882: adc edx, eqGamma_hi
1883: div dM
1884:
1885: cmp ebx, eax ; if (yBottom <= y0) clipped
1886: jle short totally_clipped
1887:
1888: mov edx, eax
1889: mov y0, eax
1890:
1891: top_intercept:
1892: mov ebx, fl ; get ready to leave
1893: mov x0, ecx
1894:
1895: cmp edi, edx ; if (yTop <= y0) done clipping
1896: jle done_clipping
1897:
1898: mov eax, edi ; x0 = (yTop * dM + eqBeta) / dN + 1
1899: mul dM
1900: stc
1901: sbb eax, eqGamma_lo
1902: sbb edx, eqGamma_hi
1903: div dN
1904: lea ecx, [eax + 1]
1905:
1906: cmp xClipRight, ecx ; if (xRight <= x0) clipped
1907: jle short totally_clipped
1908:
1909: mov y0, edi ; y0 = yTop
1910: mov x0, ecx
1911: jmp done_clipping ; all done!
1912:
1913: totally_clipped:
1914:
1915: ; The line is completely clipped. See if we have to update our style state:
1916:
1917: mov ebx, fl
1918: testb ebx, FL_STYLED
1919: jz next_line
1920:
1921: ; Adjust our style state:
1922:
1923: mov esi, pls
1924: mov eax, [esi].LS_spNext
1925: add eax, cStylePels
1926: mov [esi].LS_spNext, eax
1927:
1928: cmp eax, [esi].LS_spTotal2
1929: jb next_line
1930:
1931: ; Have to normalize first:
1932:
1933: xor edx, edx
1934: div [esi].LS_spTotal2
1935: mov [esi].LS_spNext, edx
1936:
1937: jmp next_line
1938:
1939: ;-----------------------------------------------------------------------;
1940:
1941: initialize_complex_clipping:
1942: mov eax, dN ; save a copy of original dN
1943: mov dN_Original, eax
1944:
1945: ;---------------------------Private-Routine-----------------------------;
1946: ; continue_complex_clipping
1947: ;
1948: ; Inputs:
1949: ; ebx = fl
1950: ; Output:
1951: ; ebx = fl
1952: ; ecx = x0
1953: ; esi = x1
1954: ; Uses:
1955: ; All registers.
1956: ; Exits:
1957: ; to done_clipping
1958: ;
1959: ; This routine handles the necessary initialization for the next
1960: ; run in the CLIPLINE structure.
1961: ;
1962: ; NOTE: This routine is jumped to from two places!
1963: ;-----------------------------------------------------------------------;
1964:
1965: public continue_complex_clipping
1966: continue_complex_clipping:
1967: mov edi, prun
1968: mov ecx, xStart
1969: testb ebx, FL_FLIP_H
1970: jz short complex_left_to_right
1971:
1972: complex_right_to_left:
1973:
1974: ; Figure out x0 and x1 for right-to-left lines:
1975:
1976: add ecx, cStylePels
1977: dec ecx
1978: mov esi, ecx ; esi = ecx = xStart + cStylePels - 1
1979: sub ecx, [edi].RUN_iStop ; New x0
1980: sub esi, [edi].RUN_iStart ; New x1
1981: jmp short complex_reset_variables
1982:
1983: complex_left_to_right:
1984:
1985: ; Figure out x0 and x1 for left-to-right lines:
1986:
1987: mov esi, ecx ; esi = ecx = xStart
1988: add ecx, [edi].RUN_iStart ; New x0
1989: add esi, [edi].RUN_iStop ; New x1
1990:
1991: complex_reset_variables:
1992: mov x0, ecx
1993:
1994: ; The half flip mucks with some of our variables, and we have to reset
1995: ; them every pass. We would have to reset eqGamma too, but it never
1996: ; got saved to memory in its modified form.
1997:
1998: add edi, size RUN
1999: mov prun, edi ; Increment run pointer for next time
2000:
2001: mov edi, pls
2002: mov eax, [edi].LS_spComplex
2003: mov [edi].LS_spNext, eax ; pls->spNext = pls->spComplex
2004:
2005: mov eax, dN_Original ; dN = dN_Original
2006: mov dN, eax
2007:
2008: mul ecx
2009: add eax, eqGamma_lo
2010: adc edx, eqGamma_hi ; [edx:eax] = dN*x0 + eqGamma
2011:
2012: div dM
2013: mov y0, eax
2014: jmp done_clipping
2015:
2016: endProc bLines
2017:
2018: end
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.