|
|
1.1 root 1: ;---------------------------Module-Header------------------------------;
2: ; Module Name: lines.asm
3: ;
4: ; Draws a set of connected polylines. Initialization for the device
5: ; or bitmap has already been done in the stroke routine. Solid and
6: ; styled lines are handled for both the device and bitmaps. Banking
7: ; for the display is handled.
8: ;
9: ; The code is different depending on whether we are drawing solid
10: ; lines, styled lines with common styles, or lines with completely
11: ; arbitrary styles.
12: ;
13: ; There are sixteen raster operations (sets of logical operations)
14: ; performed on the data written out. When writing to the VGA there are
15: ; four of these operations which take two passes of VGA memory. In
16: ; each of these cases the first pass inverts the necessary bits in the
17: ; necessary planes. The second pass then performs the rest of the
18: ; raster operation. The other twelve raster operations can be done in
19: ; one pass of VGA memory. All raster operations are done in one pass of
20: ; memory for bitmaps. Depending on the raster operation and the color
21: ; of the pen, it is easily determined whether we set bits to zeros, set
22: ; bits to ones, invert bits or do nothing. Bitmaps are written to one
23: ; plane at a time.
24: ;
25: ; Lines are drawn from left to right. So if a line moves from right
26: ; to left, the endpoints are swapped and the line is drawn from left to
27: ; right.
28: ;
29: ; Copyright (c) 1992 Microsoft Corporation
30: ;-----------------------------------------------------------------------;
31:
32: .386
33:
34: .model small,c
35:
36: assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
37: assume fs:nothing,gs:nothing
38:
39: .xlist
40: include stdcall.inc ;calling convention cmacros
41: include i386\egavga.inc
42: include i386\strucs.inc
43: include i386\lines.inc
44: .list
45:
46: .data
47:
48: public gaflRoundTable
49: gaflRoundTable label dword
50: dd FL_H_ROUND_DOWN + FL_V_ROUND_DOWN ; no flips
51: dd FL_H_ROUND_DOWN + FL_V_ROUND_DOWN ; D flip
52: dd FL_H_ROUND_DOWN ; V flip
53: dd FL_V_ROUND_DOWN ; D & V flip
54: dd FL_V_ROUND_DOWN ; slope one
55: dd 0baadf00dh
56: dd FL_H_ROUND_DOWN ; slope one & V flip
57: dd 0baadf00dh
58:
59: .code
60:
61: _TEXT$03 SEGMENT DWORD USE32 PUBLIC 'CODE'
62: ASSUME CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
63:
64: ;--------------------------------Macro----------------------------------;
65: ; testb ebx, <mask>
66: ;
67: ; Substitutes a byte compare if the mask is entirely in the lo-byte or
68: ; hi-byte (thus saving 3 bytes of code space).
69: ;
70: ;-----------------------------------------------------------------------;
71:
72: TESTB macro targ,mask,thirdarg
73: local mask2,delta
74:
75: ifnb <thirdarg>
76: .err TESTB mask must be enclosed in brackets!
77: endif
78:
79: delta = 0
80: mask2 = mask
81:
82: if mask2 AND 0ffff0000h
83: test targ,mask ; If bit set in hi-word,
84: exitm ; test entire dword
85: endif
86:
87: if mask2 AND 0ff00h
88: if mask2 AND 0ffh ; If bit set in lo-byte and
89: test targ,mask ; hi-byte, test entire dword
90: exitm
91: endif
92:
93: mask2 = mask2 SHR 8
94: delta = 1
95: endif
96:
97: ifidni <targ>,<EBX>
98: if delta
99: test bh,mask2
100: else
101: test bl,mask2
102: endif
103: exitm
104: endif
105:
106: .err Too bad TESTB doesn't support targets other than ebx!
107: endm
108:
109: ;---------------------------Public-Routine------------------------------;
110: ; bLines(pdsurf, pptfxFirst, pptfxBuf, prun, cptfx, pls,
111: ; prclClip, apfn[], flStart)
112: ;
113: ; Do all the DDA calculations for lines.
114: ;
115: ; Doing Lines Right
116: ; -----------------
117: ;
118: ; In NT, all lines are given to the device driver in fractional
119: ; coordinates, in a 28.4 fixed point format. The lower 4 bits are
120: ; fractional for sub-pixel positioning.
121: ;
122: ; Note that you CANNOT! just round the coordinates to integers
123: ; and pass the results to your favorite integer Bresenham routine!!
124: ; (Unless, of course, you have such a high resolution device that
125: ; nobody will notice -- not likely for a display device.) The
126: ; fractions give a more accurate rendering of the line -- this is
127: ; important for things like our Bezier curves, which would have 'kinks'
128: ; if the points in its polyline approximation were rounded to integers.
129: ;
130: ; Unfortunately, for fractional lines there is more setup work to do
131: ; a DDA than for integer lines. However, the main loop is exactly
132: ; the same (and can be done entirely with 32 bit math).
133: ;
134: ; If You've Got Hardware That Does Bresenham
135: ; ------------------------------------------
136: ;
137: ; A lot of hardware limits DDA error terms to 'n' bits. With fractional
138: ; coordinates, 4 bits are given to the fractional part, letting
139: ; you draw in hardware only those lines that lie entirely in a 2^(n-4)
140: ; by 2^(n-4) pixel space.
141: ;
142: ; And you still have to correctly draw those lines with coordinates
143: ; outside that space! Remember that the screen is only a viewport
144: ; onto a 28.4 by 28.4 space -- if any part of the line is visible
145: ; you MUST render it precisely, regardless of where the end points lie.
146: ; So even if you do it in software, somewhere you'll have to have a
147: ; 32 bit DDA routine.
148: ;
149: ; Our Implementation
150: ; ------------------
151: ;
152: ; We employ a run length slice algorithm: our DDA calculates the
153: ; number of pixels that are in each row (or 'strip') of pixels.
154: ;
155: ; We've separated the running of the DDA and the drawing of pixels:
156: ; we run the DDA for several iterations and store the results in
157: ; a 'strip' buffer (which are the lengths of consecutive pixel rows of
158: ; the line), then we crank up a 'strip drawer' that will draw all the
159: ; strips in the buffer.
160: ;
161: ; We also employ a 'half-flip' to reduce the number of strip
162: ; iterations we need to do in the DDA and strip drawing loops: when a
163: ; (normalized) line's slope is more than 1/2, we do a final flip
164: ; about the line y = (1/2)x. So now, instead of each strip being
165: ; consecutive horizontal or vertical pixel rows, each strip is composed
166: ; of those pixels aligned in 45 degree rows. So a line like (0, 0) to
167: ; (128, 128) would generate only one strip.
168: ;
169: ; We also always draw only left-to-right.
170: ;
171: ; Style lines may have arbitrary style patterns. We specially
172: ; optimize the default patterns (and call them 'masked' styles).
173: ;
174: ; The DDA Derivation
175: ; ------------------
176: ;
177: ; Here is how I like to think of the DDA calculation.
178: ;
179: ; We employ Knuth's "diamond rule": rendering a one-pixel-wide line
180: ; can be thought of as dragging a one-pixel-wide by one-pixel-high
181: ; diamond along the true line. Pixel centers lie on the integer
182: ; coordinates, and so we light any pixel whose center gets covered
183: ; by the "drag" region (John D. Hobby, Journal of the Association
184: ; for Computing Machinery, Vol. 36, No. 2, April 1989, pp. 209-229).
185: ;
186: ; We must define which pixel gets lit when the true line falls
187: ; exactly half-way between two pixels. In this case, we follow
188: ; the rule: when two pels are equidistant, the upper or left pel
189: ; is illuminated, unless the slope is exactly one, in which case
190: ; the upper or right pel is illuminated. (So we make the edges
191: ; of the diamond exclusive, except for the top and left vertices,
192: ; which are inclusive, unless we have slope one.)
193: ;
194: ; This metric decides what pixels should be on any line BEFORE it is
195: ; flipped around for our calculation. Having a consistent metric
196: ; this way will let our lines blend nicely with our curves. The
197: ; metric also dictates that we will never have one pixel turned on
198: ; directly above another that's turned on. We will also never have
199: ; a gap; i.e., there will be exactly one pixel turned on for each
200: ; column between the start and end points. All that remains to be
201: ; done is to decide how many pixels should be turned on for each row.
202: ;
203: ; So lines we draw will consist of varying numbers of pixels on
204: ; successive rows, for example:
205: ;
206: ; ******
207: ; *****
208: ; ******
209: ; *****
210: ;
211: ; We'll call each set of pixels on a row a "strip".
212: ;
213: ; (Please remember that our coordinate space has the origin as the
214: ; upper left pixel on the screen; postive y is down and positive x
215: ; is right.)
216: ;
217: ; Device coordinates are specified as fixed point 28.4 numbers,
218: ; where the first 28 bits are the integer coordinate, and the last
219: ; 4 bits are the fraction. So coordinates may be thought of as
220: ; having the form (x, y) = (M/F, N/F) where F is the constant scaling
221: ; factor F = 2^4 = 16, and M and N are 32 bit integers.
222: ;
223: ; Consider the line from (M0/F, N0/F) to (M1/F, N1/F) which runs
224: ; left-to-right and whose slope is in the first octant, and let
225: ; dM = M1 - M0 and dN = N1 - N0. Then dM >= 0, dN >= 0 and dM >= dN.
226: ;
227: ; Since the slope of the line is less than 1, the edges of the
228: ; drag region are created by the top and bottom vertices of the
229: ; diamond. At any given pixel row y of the line, we light those
230: ; pixels whose centers are between the left and right edges.
231: ;
232: ; Let mL(n) denote the line representing the left edge of the drag
233: ; region. On pixel row j, the column of the first pixel to be
234: ; lit is
235: ;
236: ; iL(j) = ceiling( mL(j * F) / F)
237: ;
238: ; Since the line's slope is less than one:
239: ;
240: ; iL(j) = ceiling( mL([j + 1/2] F) / F )
241: ;
242: ; Recall the formula for our line:
243: ;
244: ; n(m) = (dN / dM) (m - M0) + N0
245: ;
246: ; m(n) = (dM / dN) (n - N0) + M0
247: ;
248: ; Since the line's slope is less than one, the line representing
249: ; the left edge of the drag region is the original line offset
250: ; by 1/2 pixel in the y direction:
251: ;
252: ; mL(n) = (dM / dN) (n - F/2 - N0) + M0
253: ;
254: ; From this we can figure out the column of the first pixel that
255: ; will be lit on row j, being careful of rounding (if the left
256: ; edge lands exactly on an integer point, the pixel at that
257: ; point is not lit because of our rounding convention):
258: ;
259: ; iL(j) = floor( mL(j F) / F ) + 1
260: ;
261: ; = floor( ((dM / dN) (j F - F/2 - N0) + M0) / F ) + 1
262: ;
263: ; = floor( F dM j - F/2 dM - N0 dM + dN M0) / F dN ) + 1
264: ;
265: ; F dM j - [ dM (N0 + F/2) - dN M0 ]
266: ; = floor( ---------------------------------- ) + 1
267: ; F dN
268: ;
269: ; dM j - [ dM (N0 + F/2) - dN M0 ] / F
270: ; = floor( ------------------------------------ ) + 1 (1)
271: ; dN
272: ;
273: ; = floor( (dM j + alpha) / dN ) + 1
274: ;
275: ; where
276: ;
277: ; alpha = - [ dM (N0 + F/2) - dN M0 ] / F
278: ;
279: ; We use equation (1) to calculate the DDA: there are iL(j+1) - iL(j)
280: ; pixels in row j. Because we are always calculating iL(j) for
281: ; integer quantities of j, we note that the only fractional term
282: ; is constant, and so we can 'throw away' the fractional bits of
283: ; alpha:
284: ;
285: ; beta = floor( - [ dM (N0 + F/2) - dN M0 ] / F ) (2)
286: ;
287: ; so
288: ;
289: ; iL(j) = floor( (dM j + beta) / dN ) + 1 (3)
290: ;
291: ; for integers j.
292: ;
293: ; Note if iR(j) is the line's rightmost pixel on row j, that
294: ; iR(j) = iL(j + 1) - 1.
295: ;
296: ; Similarly, rewriting equation (1) as a function of column i,
297: ; we can determine, given column i, on which pixel row j is the line
298: ; lit:
299: ;
300: ; dN i + [ dM (N0 + F/2) - dN M0 ] / F
301: ; j(i) = ceiling( ------------------------------------ ) - 1
302: ; dM
303: ;
304: ; Floors are easier to compute, so we can rewrite this:
305: ;
306: ; dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F
307: ; j(i) = floor( ----------------------------------------------- ) - 1
308: ; dM
309: ;
310: ; dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F - dM
311: ; = floor( ---------------------------------------------------- )
312: ; dM
313: ;
314: ; dN i + [ dM (N0 + F/2) - dN M0 - 1 ] / F
315: ; = floor( ---------------------------------------- )
316: ; dM
317: ;
318: ; We can once again wave our hands and throw away the fractional bits
319: ; of the remainder term:
320: ;
321: ; j(i) = floor( (dN i + gamma) / dM ) (4)
322: ;
323: ; where
324: ;
325: ; gamma = floor( [ dM (N0 + F/2) - dN M0 - 1 ] / F ) (5)
326: ;
327: ; We now note that
328: ;
329: ; beta = -gamma - 1 = ~gamma (6)
330: ;
331: ; To draw the pixels of the line, we could evaluate (3) on every scan
332: ; line to determine where the strip starts. Of course, we don't want
333: ; to do that because that would involve a multiply and divide for every
334: ; scan. So we do everything incrementally.
335: ;
336: ; We would like to easily compute c , the number of pixels on scan j:
337: ; j
338: ;
339: ; c = iL(j + 1) - iL(j)
340: ; j
341: ;
342: ; = floor((dM (j + 1) + beta) / dN) - floor((dM j + beta) / dN) (7)
343: ;
344: ; This may be rewritten as
345: ;
346: ; c = floor(i + r / dN) - floor(i + r / dN) (8)
347: ; j j+1 j+1 j j
348: ;
349: ; where i , i are integers and r < dN, r < dN.
350: ; j j+1 j j+1
351: ;
352: ; Rewriting (7) again:
353: ;
354: ; c = floor(i + r / dN + dM / dN) - floor(i + r / dN)
355: ; j j j j j
356: ;
357: ;
358: ; = floor((r + dM) / dN) - floor(r / dN)
359: ; j j
360: ;
361: ; This may be rewritten as
362: ;
363: ; c = dI + floor((r + dR) / dN) - floor(r / dN)
364: ; j j j
365: ;
366: ; where dI + dR / dN = dM / dN, dI is an integer and dR < dN.
367: ;
368: ; r is the remainder (or "error") term in the DDA loop: r / dN
369: ; j j
370: ; is the exact fraction of a pixel at which the strip ends. To go
371: ; on to the next scan and compute c we need to know r .
372: ; j+1 j+1
373: ;
374: ; So in the main loop of the DDA:
375: ;
376: ; c = dI + floor((r + dR) / dN) and r = (r + dR) % dN
377: ; j j j+1 j
378: ;
379: ; and we know r < dN, r < dN, and dR < dN.
380: ; j j+1
381: ;
382: ; We have derived the DDA only for lines in the first octant; to
383: ; handle other octants we do the common trick of flipping the line
384: ; to the first octant by first making the line left-to-right by
385: ; exchanging the end-points, then flipping about the lines y = 0 and
386: ; y = x, as necessary. We must record the transformation so we can
387: ; undo them later.
388: ;
389: ; We must also be careful of how the flips affect our rounding. If
390: ; to get the line to the first octant we flipped about x = 0, we now
391: ; have to be careful to round a y value of 1/2 up instead of down as
392: ; we would for a line originally in the first octant (recall that
393: ; "In the case where two pels are equidistant, the upper or left
394: ; pel is illuminated...").
395: ;
396: ; To account for this rounding when running the DDA, we shift the line
397: ; (or not) in the y direction by the smallest amount possible. That
398: ; takes care of rounding for the DDA, but we still have to be careful
399: ; about the rounding when determining the first and last pixels to be
400: ; lit in the line.
401: ;
402: ; Determining The First And Last Pixels In The Line
403: ; -------------------------------------------------
404: ;
405: ; Fractional coordinates also make it harder to determine which pixels
406: ; will be the first and last ones in the line. We've already taken
407: ; the fractional coordinates into account in calculating the DDA, but
408: ; the DDA cannot tell us which are the end pixels because it is quite
409: ; happy to calculate pixels on the line from minus infinity to positive
410: ; infinity.
411: ;
412: ; The diamond rule determines the start and end pixels. (Recall that
413: ; the sides are exclusive except for the left and top vertices.)
414: ; This convention can be thought of in another way: there are diamonds
415: ; around the pixels, and wherever the true line crosses a diamond,
416: ; that pel is illuminated.
417: ;
418: ; Consider a line where we've done the flips to the first octant, and the
419: ; floor of the start coordinates is the origin:
420: ;
421: ; +-----------------------> +x
422: ; |
423: ; | 0 1
424: ; | 0123456789abcdef
425: ; |
426: ; | 0 00000000?1111111
427: ; | 1 00000000 1111111
428: ; | 2 0000000 111111
429: ; | 3 000000 11111
430: ; | 4 00000 ** 1111
431: ; | 5 0000 ****1
432: ; | 6 000 1***
433: ; | 7 00 1 ****
434: ; | 8 ? ***
435: ; | 9 22 3 ****
436: ; | a 222 33 ***
437: ; | b 2222 333 ****
438: ; | c 22222 3333 **
439: ; | d 222222 33333
440: ; | e 2222222 333333
441: ; | f 22222222 3333333
442: ; |
443: ; | 2 3
444: ; v
445: ; +y
446: ;
447: ; If the start of the line lands on the diamond around pixel 0 (shown by
448: ; the '0' region here), pixel 0 is the first pel in the line. The same
449: ; is true for the other pels.
450: ;
451: ; A little more work has to be done if the line starts in the
452: ; 'nether-land' between the diamonds (as illustrated by the '*' line):
453: ; the first pel lit is the first diamond crossed by the line (pixel 1 in
454: ; our example). This calculation is determined by the DDA or slope of
455: ; the line.
456: ;
457: ; If the line starts exactly half way between two adjacent pixels
458: ; (denoted here by the '?' spots), the first pixel is determined by our
459: ; round-down convention (and is dependent on the flips done to
460: ; normalize the line).
461: ;
462: ; Last Pel Exclusive
463: ; ------------------
464: ;
465: ; To eliminate repeatedly lit pels between continuous connected lines,
466: ; we employ a last-pel exclusive convention: if the line ends exactly on
467: ; the diamond around a pel, that pel is not lit. (This eliminates the
468: ; checks we had in the old code to see if we were re-lighting pels.)
469: ;
470: ; The Half Flip
471: ; -------------
472: ;
473: ; To make our run length algorithm more efficient, we employ a "half
474: ; flip". If after normalizing to the first octant, the slope is more
475: ; than 1/2, we subtract the y coordinate from the x coordinate. This
476: ; has the effect of reflecting the coordinates through the line of slope
477: ; 1/2. Note that the diagonal gets mapped into the x-axis after a half
478: ; flip.
479: ;
480: ; How Many Bits Do We Need, Anyway?
481: ; ---------------------------------
482: ;
483: ; Note that if the line is visible on your screen, you must light up
484: ; exactly the correct pixels, no matter where in the 28.4 x 28.4 device
485: ; space the end points of the line lie (meaning you must handle 32 bit
486: ; DDAs, you can certainly have optimized cases for lesser DDAs).
487: ;
488: ; We move the origin to (floor(M0 / F), floor(N0 / F)), so when we
489: ; calculate gamma from (5), we know that 0 <= M0, N0 < F. And we
490: ; are in the first octant, so dM >= dN. Then we know that gamma can
491: ; be in the range [(-1/2)dM, (3/2)dM]. The DDI guarantees us that
492: ; valid lines will have dM and dN values at most 31 bits (unsigned)
493: ; of significance. So gamma requires 33 bits of significance (we store
494: ; this as a 64 bit number for convenience).
495: ;
496: ; When running through the DDA loop, r + dR can have a value in the
497: ; j
498: ; range 0 <= r < 2 dN; thus the result must be a 32 bit unsigned value.
499: ; j
500: ;
501: ; Testing Lines
502: ; -------------
503: ;
504: ; To be NT compliant, a display driver must exactly adhere to GIQ,
505: ; which means that for any given line, the driver must light exactly
506: ; the same pels as does GDI. This can be tested using the Guiman tool
507: ; provided elsewhere in the DDK, and 'ZTest', which draws random lines
508: ; on the screen and to a bitmap, and compares the results.
509: ;
510: ; If You've Got Line Hardware
511: ; ---------------------------
512: ;
513: ; If your hardware already adheres to GIQ, you're all set. Otherwise
514: ; you'll want to look at the S3 sample code and read the following:
515: ;
516: ; 1) You'll want to special case integer-only lines, since they require
517: ; less processing time and are more common (CAD programs will probably
518: ; only ever give integer lines). GDI does not provide a flag saying
519: ; that all lines in a path are integer lines; consequently, you will
520: ; have to explicitly check every line.
521: ;
522: ; 2) You are required to correctly draw any line in the 28.4 device
523: ; space that intersects the viewport. If you have less than 32 bits
524: ; of significance in the hardware for the Bresenham terms, extremely
525: ; long lines would overflow the hardware. For such (rare) cases, you
526: ; can fall back to strip-drawing code, of which there is a C version in
527: ; the S3's lines.cxx (or if your display is a frame buffer, fall back
528: ; to the engine).
529: ;
530: ; 3) If you can explicitly set the Bresenham terms in your hardware, you
531: ; can draw non-integer lines using the hardware. If your hardware has
532: ; 'n' bits of precision, you can draw GIQ lines that are up to 2^(n-5)
533: ; pels long (4 bits are required for the fractional part, and one bit is
534: ; used as a sign bit). Note that integer lines don't require the 4
535: ; fractional bits, so if you special case them as in 1), you can do
536: ; integer lines that are up to 2^(n - 1) pels long. See the S3's
537: ; fastline.asm for an example.
538: ;
539: ;-----------------------------------------------------------------------;
540:
541: cProc bLines,36,< \
542: uses esi edi ebx, \
543: pdsurf: ptr, \
544: pptfxFirst: ptr, \
545: pptfxBuf: ptr, \
546: prun: ptr, \
547: cptfx: dword, \
548: pls: ptr, \
549: prclClip: ptr, \
550: apfn: ptr, \
551: flStart: dword >
552:
553: ; pdsurf: Surface data
554: ; pptfxFirst: Start point of first line
555: ; pptfxBuf: All subsequent points
556: ; prun: Array of runs if doing complex clipping
557: ; cptfx: Number of points in pptfxBuf (i.e., # lines)
558: ; pls: Line state
559: ; prclClip: Clip rectangle if doing simple clipping
560: ; apfn: Pointer to table of strip drawers
561: ; flStart: Flags for all lines
562:
563: local cPelsAfterThisBank: dword ; For bank switching
564: local cStripsInNextRun: dword ; For bank switching
565: local pptfxBufEnd: ptr ; Last point in pptfxBuf
566: local M0: dword ; Normalized x0 in device coords
567: local dM: dword ; Delta-x in device coords
568: local N0: dword ; Normalized y0 in device coords
569: local dN: dword ; Delta-y in device coords
570: local fl: dword ; Flags for current line
571: local x: dword ; Normalized start pixel x-coord
572: local y: dword ; Normalized start pixel y-coord
573: local eqGamma_lo: dword ; Upper 32 bits of Gamma
574: local eqGamma_hi: dword ; Lower 32 bits of Gamma
575: local x0: dword ; Start pixel x-offset
576: local y0: dword ; Start pixel y-offset
577: local ulSlopeOneAdjustment: dword ; Special offset if line of slope 1
578: local cStylePels: dword ; # of pixels in line (before clip)
579: local xStart: dword ; Start pixel x-offset before clip
580: local pfn: ptr ; Pointer to strip drawing function
581: local cPels: dword ; # pixels to be drawn (after clip)
582: local i: dword ; # pixels in strip
583: local r: dword ; Remainder (or "error") term
584: local d_I: dword ; Delta-I
585: local d_R: dword ; Delta-R
586: local plStripEnd: ptr ; Last strip in buffer
587: local ptlStart[size POINTL]: byte ; Unnormalized start coord
588: local dN_Original: dword ; dN before half-flip
589: local xClipLeft: dword ; Left side of clip rectangle
590: local xClipRight: dword ; Right side of clip rectangle
591: local strip[size STRIPS]: byte ; Our strip buffer
592:
593: ; Do some initializing:
594:
595: mov ecx, cptfx
596: mov edx, pptfxBuf
597: lea eax, [edx + ecx * (size POINTL) - (size POINTL)]
598: mov pptfxBufEnd, eax ; pptfxBufEnd is inclusive of end point
599:
600: mov eax, [edx].ptl_x ; Load up end point (M1, N1)
601: mov edi, [edx].ptl_y
602:
603: mov edx, pptfxFirst ; Load up start point (M0, N0)
604: mov esi, [edx].ptl_x
605: mov ecx, [edx].ptl_y
606:
607: mov ebx, flStart
608:
609: ;-----------------------------------------------------------------------;
610: ; Flip to the first octant. ;
611: ;-----------------------------------------------------------------------;
612:
613: ; Register state: esi = M0
614: ; ecx = N0
615: ; eax = dM (M1)
616: ; edi = dN (N1)
617: ; ebx = fl
618:
619: ; Make sure we go left to right:
620:
621: the_main_loop:
622: cmp esi, eax
623: jle short is_left_to_right ; skip if M0 <= M1
624: xchg esi, eax ; swap M0, M1
625: xchg ecx, edi ; swap N0, N1
626: or ebx, FL_FLIP_H
627:
628: is_left_to_right:
629:
630: ; Compute the deltas, remembering that the DDI says we should get
631: ; deltas less than 2^31. If we get more, we ensure we don't crash
632: ; later on by simply skipping the line:
633:
634: sub eax, esi ; eax = dM
635: jo next_line ; dM must be less than 2^31
636: sub edi, ecx ; edi = dN
637: jo next_line ; dN must be less than 2^31
638:
639: jge short is_top_to_bottom ; skip if dN >= 0
640: neg ecx ; N0 = -N0
641: neg edi ; N1 = -N1
642: or ebx, FL_FLIP_V
643:
644: is_top_to_bottom:
645: cmp edi, eax
646: jb short done_flips ; skip if dN < dM
647: jne short slope_more_than_one
648:
649: ; We must special case slopes of one (because of our rounding convention):
650:
651: or ebx, FL_FLIP_SLOPE_ONE
652: jmp short done_flips
653:
654: slope_more_than_one:
655: xchg eax, edi ; swap dM, dN
656: xchg esi, ecx ; swap M0, N0
657: or ebx, FL_FLIP_D
658:
659: done_flips:
660:
661: mov edx, ebx
662: and edx, FL_ROUND_MASK
663: .errnz FL_ROUND_SHIFT - 2
664: or ebx, [gaflRoundTable + edx] ; get our rounding flags
665:
666: mov dM, eax ; save some info
667: mov dN, edi
668: mov fl, ebx
669:
670: ; We're going to shift our origin so that it's at the closest integer
671: ; coordinate to the left/above our fractional start point (it makes
672: ; the math quicker):
673:
674: mov edx, esi ; x = LFLOOR(M0)
675: sar edx, FLOG2
676: mov x, edx
677:
678: mov edx, ecx ; y = LFLOOR(N0)
679: sar edx, FLOG2
680: mov y, edx
681:
682: ;-----------------------------------------------------------------------;
683: ; Compute the fractional remainder term ;
684: ;-----------------------------------------------------------------------;
685:
686: ; By shifting the origin we've contrived to eliminate the integer
687: ; portion of our fractional start point, giving us start point
688: ; fractional coordinates in the range [0, F - 1]:
689:
690: and esi, F - 1 ; M0 = FXFRAC(M0)
691: and ecx, F - 1 ; N0 = FXFRAC(N0)
692:
693: ; We now compute Gamma:
694:
695: mov M0, esi ; save M0, N0 for later
696: mov N0, ecx
697:
698: lea edx, [ecx + F/2]
699: mul edx ; [edx:eax] = dM * (N0 + F/2)
700: xchg eax, edi
701: mov ecx, edx ; [ecx:edi] = dM * (N0 + F/2)
702: ; (we just nuked N0)
703:
704: mul esi ; [edx:eax] = dN * M0
705:
706: ; Now gamma = dM * (N0 + F/2) - dN * M0 - bRoundDown
707:
708: .errnz FL_V_ROUND_DOWN - 8000h
709: ror bh, 8
710: sbb edi, eax
711: sbb ecx, edx
712:
713: shrd edi, ecx, FLOG2
714: sar ecx, FLOG2 ; gamma = [ecx:edi] >>= 4
715:
716: mov eqGamma_hi, ecx
717: mov eqGamma_lo, edi
718:
719: mov eax, N0
720:
721: ; Register state:
722: ; eax = N0
723: ; ebx = fl
724: ; ecx = eqGamma_hi
725: ; edx = garbage
726: ; esi = M0
727: ; edi = eqGamma_lo
728:
729: testb ebx, FL_FLIP_H
730: jnz line_runs_right_to_left
731:
732: ;-----------------------------------------------------------------------;
733: ; Figure out which pixels are at the ends of a left-to-right line. ;
734: ; --------> ;
735: ;-----------------------------------------------------------------------;
736:
737: public line_runs_left_to_right
738: line_runs_left_to_right:
739: or esi, esi
740: jz short LtoR_check_slope_one
741: ; skip ahead if M0 == 0
742: ; (in that case, x0 = 0 which is to be
743: ; kept in esi, and is already
744: ; conventiently zero)
745:
746: or eax, eax
747: jnz short LtoR_N0_not_zero
748:
749: .errnz FL_H_ROUND_DOWN - 80h
750: ror bl, 8
751: sbb esi, -F/2
752: shr esi, FLOG2
753: jmp short LtoR_check_slope_one
754: ; esi = x0 = rounded M0
755:
756: LtoR_N0_not_zero:
757: sub eax, F/2
758: sbb edx, edx
759: xor eax, edx
760: sub eax, edx
761: cmp esi, eax
762: sbb esi, esi
763: inc esi ; esi = x0 = (abs(N0 - F/2) <= M0)
764:
765: public LtoR_check_slope_one
766: LtoR_check_slope_one:
767: mov ulSlopeOneAdjustment, 0
768: mov eax, ebx
769: and eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN
770: cmp eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN
771: jne short LtoR_compute_y0_from_x0
772:
773: ; We have to special case lines that are exactly of slope 1 or -1:
774:
775: mov eax, N0
776: add eax, dN
777: and eax, F - 1 ; eax = N1
778: jz short LtoR_slope_one_check_start_point
779:
780: mov edx, M0
781: add edx, dM
782: and edx, F - 1 ; edx = M1
783:
784: add eax, F/2
785: cmp edx, eax ; cmp M1, N1 + F/2
786: jne short LtoR_slope_one_check_start_point
787: mov ulSlopeOneAdjustment, -1
788:
789: LtoR_slope_one_check_start_point:
790: mov eax, M0
791: or eax, eax
792: jz short LtoR_compute_y0_from_x0
793:
794: add eax, F/2
795: cmp eax, N0 ; cmp M0 + 8, N0
796: jne short LtoR_compute_y0_from_x0
797:
798: xor esi, esi ; x0 = 0
799:
800: LtoR_compute_y0_from_x0:
801:
802: ; ecx = eqGamma_hi
803: ; esi = x0
804: ; edi = eqGamma_lo
805:
806: mov eax, dN
807: mov edx, dM
808:
809: mov x0, esi
810: mov y0, 0
811: cmp ecx, 0
812: jl short LtoR_compute_x1
813:
814: neg esi
815: and esi, eax
816: sub edx, esi
817: cmp edi, edx
818: mov edx, dM
819: jl short LtoR_compute_x1
820: mov y0, 1 ; y0 = floor((dN * x0 + eqGamma) / dM)
821:
822: LtoR_compute_x1:
823:
824: ; Register state:
825: ; eax = dN
826: ; ebx = fl
827: ; ecx = garbage
828: ; edx = dM
829: ; esi = garbage
830: ; edi = garbage
831:
832: mov esi, M0
833: add esi, edx
834: mov ecx, esi
835: shr esi, FLOG2
836: dec esi ; x1 = ((M0 + dM) >> 4) - 1
837: add esi, ulSlopeOneAdjustment
838: and ecx, F-1 ; M1 = (M0 + dM) & 15
839: jz done_first_pel_last_pel
840:
841: add eax, N0
842: and eax, F-1 ; N1 = (N0 + dN) & 15
843: jnz short LtoR_N1_not_zero
844:
845: .errnz FL_H_ROUND_DOWN - 80h
846: ror bl, 8
847: sbb ecx, -F/2
848: shr ecx, FLOG2 ; ecx = LROUND(M1, fl & FL_ROUND_DOWN)
849: add esi, ecx
850: jmp done_first_pel_last_pel
851:
852: LtoR_N1_not_zero:
853: sub eax, F/2
854: sbb edx, edx
855: xor eax, edx
856: sub eax, edx
857: cmp eax, ecx
858: jg done_first_pel_last_pel
859: inc esi
860: jmp done_first_pel_last_pel
861:
862: ;-----------------------------------------------------------------------;
863: ; Figure out which pixels are at the ends of a right-to-left line. ;
864: ; <-------- ;
865: ;-----------------------------------------------------------------------;
866:
867: ; Compute x0:
868:
869: public line_runs_right_to_left
870: line_runs_right_to_left:
871: mov x0, 1 ; x0 = 1
872: or eax, eax
873: jnz short RtoL_N0_not_zero
874:
875: xor edx, edx ; ulDelta = 0
876: .errnz FL_H_ROUND_DOWN - 80h
877: ror bl, 8
878: sbb esi, -F/2
879: shr esi, FLOG2 ; esi = LROUND(M0, fl & FL_H_ROUND_DOWN)
880: jz short RtoL_check_slope_one
881:
882: mov x0, 2
883: mov edx, dN
884: jmp short RtoL_check_slope_one
885:
886: RtoL_N0_not_zero:
887: sub eax, F/2
888: sbb edx, edx
889: xor eax, edx
890: sub eax, edx
891: add eax, esi ; eax = ABS(N0 - F/2) + M0
892: xor edx, edx ; ulDelta = 0
893: cmp eax, F
894: jle short RtoL_check_slope_one
895:
896: mov x0, 2 ; x0 = 2
897: mov edx, dN ; ulDelta = dN
898:
899: public RtoL_check_slope_one
900: RtoL_check_slope_one:
901: mov ulSlopeOneAdjustment, 0
902: mov eax, ebx
903: and eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN
904: cmp eax, FL_FLIP_SLOPE_ONE
905: jne short RtoL_compute_y0_from_x0
906:
907: ; We have to special case lines that are exactly of slope 1 or -1:
908:
909: mov eax, N0
910: add eax, dN
911: and eax, F - 1 ; eax = N1
912: jz short RtoL_slope_one_check_start_point
913:
914: mov esi, M0
915: add esi, dM
916: and esi, F - 1 ; esi = M1
917:
918: add eax, F/2
919: cmp esi, eax ; cmp M1, N1 + F/2
920: jne short RtoL_slope_one_check_start_point
921: mov ulSlopeOneAdjustment, 1
922:
923: RtoL_slope_one_check_start_point:
924: mov eax, M0
925: or eax, eax
926: jz short RtoL_compute_y0_from_x0
927:
928: add eax, F/2
929: cmp eax, N0 ; cmp M0 + 8, N0
930: jne short RtoL_compute_y0_from_x0
931:
932: mov x0, 2 ; x0 = 2
933: mov edx, dN ; ulDelta = dN
934:
935: RtoL_compute_y0_from_x0:
936:
937: ; eax = garbage
938: ; ebx = fl
939: ; ecx = eqGamma_hi
940: ; edx = ulDelta
941: ; esi = garbage
942: ; edi = eqGamma_lo
943:
944: mov eax, dN ; eax = dN
945: mov y0, 0 ; y0 = 0
946:
947: add edi, edx
948: adc ecx, 0 ; eqGamma += ulDelta
949: ; NOTE: Setting flags here!
950: mov edx, dM ; edx = dM
951: jl short RtoL_compute_x1 ; NOTE: Looking at the flags here!
952: jg short RtoL_y0_is_2
953:
954: lea ecx, [edx + edx]
955: sub ecx, eax ; ecx = 2 * dM - dN
956: cmp edi, ecx
957: jge short RtoL_y0_is_2
958:
959: sub ecx, edx ; ecx = dM - dN
960: cmp edi, ecx
961: jl short RtoL_compute_x1
962:
963: mov y0, 1
964: jmp short RtoL_compute_x1
965:
966: RtoL_y0_is_2:
967: mov y0, 2
968:
969: RtoL_compute_x1:
970:
971: ; Register state:
972: ; eax = dN
973: ; ebx = fl
974: ; ecx = garbage
975: ; edx = dM
976: ; esi = garbage
977: ; edi = garbage
978:
979: mov esi, M0
980: add esi, edx
981: mov ecx, esi
982: shr esi, FLOG2 ; x1 = (M0 + dM) >> 4
983: add esi, ulSlopeOneAdjustment
984: and ecx, F-1 ; M1 = (M0 + dM) & 15
985:
986: add eax, N0
987: and eax, F-1 ; N1 = (N0 + dN) & 15
988: jnz short RtoL_N1_not_zero
989:
990: .errnz FL_H_ROUND_DOWN - 80h
991: ror bl, 8
992: sbb ecx, -F/2
993: shr ecx, FLOG2 ; ecx = LROUND(M1, fl & FL_ROUND_DOWN)
994: add esi, ecx
995: jmp done_first_pel_last_pel
996:
997: RtoL_N1_not_zero:
998: sub eax, F/2
999: sbb edx, edx
1000: xor eax, edx
1001: sub eax, edx
1002: add eax, ecx ; eax = ABS(N1 - F/2) + M1
1003: cmp eax, F+1
1004: sbb esi, -1
1005:
1006: done_first_pel_last_pel:
1007:
1008: ; Register state:
1009: ; eax = garbage
1010: ; ebx = fl
1011: ; ecx = garbage
1012: ; edx = garbage
1013: ; esi = x1
1014: ; edi = garbage
1015:
1016: mov ecx, x0
1017: lea edx, [esi + 1]
1018: sub edx, ecx ; edx = x1 - x0 + 1
1019:
1020: jle next_line
1021: mov cStylePels, edx
1022: mov xStart, ecx
1023:
1024: ;-----------------------------------------------------------------------;
1025: ; See if clipping or styling needs to be done. ;
1026: ;-----------------------------------------------------------------------;
1027:
1028: testb ebx, FL_CLIP
1029: jnz do_some_clipping
1030:
1031: ; Register state:
1032: ; eax = garbage
1033: ; ebx = fl
1034: ; ecx = x0 (stack variable correct too)
1035: ; edx = garbage
1036: ; esi = x1
1037: ; edi = garbage
1038:
1039: done_clipping:
1040: mov eax, y0
1041:
1042: sub esi, ecx
1043: inc esi ; esi = cPels = x1 - x0 + 1
1044: mov cPels, esi
1045:
1046: mov esi, pdsurf
1047: add ecx, x ; ecx = ptlStart.ptl_x
1048: add eax, y ; eax = ptlStart.ptl_y
1049:
1050: mov esi, [esi].dsurf_lNextScan ; we'll compute the sign of lNextScan
1051:
1052: testb ebx, FL_FLIP_D
1053: jz short do_v_unflip
1054: xchg ecx, eax
1055:
1056: do_v_unflip:
1057: testb ebx, FL_FLIP_V
1058: jz short done_unflips
1059: neg eax
1060: neg esi
1061:
1062: done_unflips:
1063: mov strip.ST_lNextScan, esi ; lNextScan now right for y-direction
1064: testb ebx, FL_STYLED
1065: jnz do_some_styling
1066:
1067: done_styling:
1068: lea edx, [strip.ST_alStrips + (STRIP_MAX * 4)]
1069: mov plStripEnd, edx
1070:
1071: mov cPelsAfterThisBank, 0
1072: mov cStripsInNextRun, 7fffffffh
1073:
1074: testb ebx, FL_PHYSICAL_DEVICE
1075: jz done_bank_setup
1076:
1077: ;-----------------------------------------------------------------------;
1078: ; Do banking setup. ;
1079: ;-----------------------------------------------------------------------;
1080:
1081: public bank_setup
1082: bank_setup:
1083:
1084: ; Register state:
1085: ; eax = ptlStart.ptl_y
1086: ; ebx = fl
1087: ; ecx = ptlStart.ptl_x
1088: ; edx = garbage
1089: ; esi = garbage
1090: ; edi = garbage
1091:
1092: mov esi, pdsurf
1093: cmp eax, [esi].dsurf_rcl1WindowClip.yTop
1094: jl short bank_get_initial_bank ; ptlStart.y < rcl1WindowClip.yTop
1095:
1096: cmp eax, [esi].dsurf_rcl1WindowClip.yBottom
1097: jl short bank_got_initial_bank ; ptlStart.y < rcl1WindowClip.yBot
1098:
1099: bank_get_initial_bank:
1100: mov ptlStart.ptl_y, eax ; Save ptlStart.ptl_y
1101: mov edi, ecx ; Save ptlStart.ptl_x
1102:
1103: .errnz JustifyTop
1104: .errnz JustifyBottom - 1
1105: .errnz FL_FLIP_V - 8
1106:
1107: mov ecx, ebx ; JustifyTop if line goes down,
1108: shr ecx, 3 ; JustifyBottom if line goes up
1109: and ecx, 1
1110:
1111: bank_justified:
1112: ptrCall <dword ptr [esi].dsurf_pfnBankControl>, \
1113: <esi, eax, ecx>
1114:
1115: mov eax, ptlStart.ptl_y
1116: mov ecx, edi
1117:
1118: bank_got_initial_bank:
1119: testb ebx, FL_FLIP_D
1120: jz short bank_major_x
1121:
1122: bank_major_y:
1123: testb ebx, FL_FLIP_V
1124: jz short bank_major_y_down
1125: bank_major_y_up:
1126: lea edi, [eax + 1]
1127: sub edi, [esi].dsurf_rcl1WindowClip.yTop
1128: jmp short bank_done_y_major
1129: bank_major_y_down:
1130: mov edi, [esi].dsurf_rcl1WindowClip.yBottom
1131: sub edi, eax
1132: bank_done_y_major:
1133: mov esi, cPels
1134: sub esi, edi ; edi = cPelsInBank
1135: mov cPelsAfterThisBank, esi
1136: jle short done_bank_setup
1137: mov cPels, edi
1138: jmp short done_bank_setup
1139:
1140: bank_major_x:
1141: mov edi, dN
1142: shr edi, FLOG2
1143: add edi, y
1144:
1145: ; We're guessing at the y-position of the end pixel (it's too much work
1146: ; to compute the actual value) to see if the line spans more than one
1147: ; bank. We have to add at least a slop value of '3' because the actual
1148: ; start pixel may be may 2 off from 'y' because of end-pixel exclusiveness,
1149: ; and we have to add 1 more because we're taking the floor of (dN / F), to
1150: ; account for rounding:
1151:
1152: add edi, 3 ; yEnd = edi = y + LFLOOR(dN) + 3
1153: testb ebx, FL_FLIP_V
1154: jz short bank_major_x_down
1155: bank_major_x_up:
1156: mov edx, 1
1157: sub edx, [esi].dsurf_rcl1WindowClip.yTop ; edx = -yNextBankStart
1158:
1159: cmp edi, edx
1160: lea edx, [edx + eax] ; edx = cStripsInNextRun
1161: jl short bank_major_x_done
1162:
1163: ; Line may go over bank boundary, so don't do a half flip:
1164:
1165: or ebx, FL_DONT_DO_HALF_FLIP
1166: jmp short bank_major_x_done
1167:
1168: bank_major_x_down:
1169: mov esi, [esi].dsurf_rcl1WindowClip.yBottom ; esi = yNextBankStart
1170:
1171: mov edx, esi
1172: sub edx, eax ; edx = cStripsInNextRun
1173:
1174: cmp edi, esi
1175: jl short bank_major_x_done
1176: or ebx, FL_DONT_DO_HALF_FLIP
1177:
1178: bank_major_x_done:
1179: sub edx, STRIP_MAX
1180: mov cStripsInNextRun, edx
1181: jge short done_bank_setup
1182:
1183: lea edx, [strip.ST_alStrips + edx * 4 + (STRIP_MAX * 4)]
1184: mov plStripEnd, edx
1185:
1186: done_bank_setup:
1187:
1188: ;-----------------------------------------------------------------------;
1189: ; Setup to do DDA. ;
1190: ;-----------------------------------------------------------------------;
1191:
1192: ; Register state:
1193: ; eax = ptlStart.ptl_y
1194: ; ebx = fl
1195: ; ecx = ptlStart.ptl_x
1196: ; edx = garbage
1197: ; esi = garbage
1198: ; edi = garbage
1199:
1200: mov edx, 80h
1201: ror dl, cl
1202: mov strip.ST_jBitMask, dl ; ST_jBitMask =
1203: ; (0x80 >> (ptlStart.ptl_x & 0x7))
1204:
1205: mov esi, pdsurf
1206: mov edi, eax ; Now edi = ptlStart.ptl_y
1207: imul [esi].dsurf_lNextScan
1208: add eax, [esi].dsurf_pvBitmapStart
1209: sar ecx, 3
1210: add eax, ecx
1211: mov strip.ST_pjScreen, eax ; ST_pjScreen = pchBits + ptlStart.ptl_y *
1212: ; cjDelta + (ptlStart.ptl_x >> 3)
1213:
1214: mov eax, dM
1215: mov ecx, dN
1216: mov esi, eqGamma_lo
1217: mov edi, eqGamma_hi
1218:
1219: ; Register state:
1220: ; eax = dM
1221: ; ebx = fl
1222: ; ecx = dN
1223: ; edx = garbage
1224: ; esi = eqGamma_lo
1225: ; edi = eqGamma_hi
1226:
1227: lea edx, [ecx + ecx] ; if (2 * dN > dM)
1228: cmp edx, eax
1229: mov edx, y0 ; Load y0 again
1230: jbe short after_half_flip
1231:
1232: test ebx, (FL_STYLED + FL_DONT_DO_HALF_FLIP)
1233: jnz short after_half_flip
1234:
1235: or ebx, FL_FLIP_HALF
1236: mov fl, ebx
1237:
1238: ; Do a half flip!
1239:
1240: not esi
1241: not edi
1242: add esi, eax
1243: adc edi, 0 ; eqGamma = -eqGamma - 1 + dM
1244:
1245: neg ecx
1246: add ecx, eax ; dN = dM - dN
1247:
1248: neg edx
1249: add edx, x0 ; y0 = x0 - y0
1250:
1251: after_half_flip:
1252: mov strip.ST_flFlips, ebx
1253: and ebx, FL_STRIP_MASK
1254:
1255: .errnz FL_STRIP_SHIFT
1256: mov eax, apfn
1257: lea eax, [eax + ebx * 4]
1258: mov eax, [eax]
1259: mov pfn, eax
1260: mov eax, dM
1261:
1262: ; Register state:
1263: ; eax = dM
1264: ; ebx = garbage
1265: ; ecx = dN
1266: ; edx = y0
1267: ; esi = eqGamma_lo
1268: ; edi = eqGamma_hi
1269:
1270: or ecx, ecx
1271: jz short zero_slope
1272:
1273: compute_dda_stuff:
1274: inc edx
1275: mul edx
1276: stc ; set the carry to accomplish -1
1277: sbb eax, esi
1278: sbb edx, edi ; (y0 + 1) * dM - eqGamma - 1
1279: div ecx
1280:
1281: mov esi, eax ; esi = i
1282: mov edi, edx ; edi = r
1283:
1284: xor edx, edx
1285: mov eax, dM
1286: div ecx ; edx = d_R, eax = d_I
1287: mov d_I, eax
1288:
1289: sub esi, x0
1290: inc esi
1291:
1292: done_dda_stuff:
1293: lea eax, [strip.ST_alStrips]
1294: mov ebx, cPels
1295:
1296: ;-----------------------------------------------------------------------;
1297: ; Do our main DDA loop. ;
1298: ;-----------------------------------------------------------------------;
1299:
1300: sub edi, ecx ; offset remainder term from [0..dN)
1301: ; to [-dN..0) so test in inner
1302: ; loop is quicker
1303: align 4
1304:
1305: ; Register state:
1306: ; eax = plStrip ; current pointer into strip array
1307: ; ebx = cPels ; total number of pels in line
1308: ; ecx = dN ; delta-N = rise in line
1309: ; edx = d_R ; d_I + d_R/dN = exact strip length
1310: ; esi = i ; length of current strip
1311: ; edi = r ; remainder term for current strip
1312: ; ; in range [-dN..0)
1313:
1314: public dda_loop
1315: dda_loop:
1316: sub ebx, esi ; subtract strip length from line length
1317: jle final_strip ; if negative, done with line
1318:
1319: mov [eax], esi ; write strip length to strip array
1320: add eax, 4
1321: cmp plStripEnd, eax ; is the strip array buffer full?
1322: jbe short output_strips ; if so, empty it
1323:
1324: ; The output_strips routine jumps to here when done:
1325:
1326: done_output_strips:
1327: mov esi, d_I ; our normal strip length
1328: add edi, edx ; adjust our remainder term
1329: jl short dda_loop
1330:
1331: sub edi, ecx ; our remainder became 1 or more, so
1332: inc esi ; we increment this strip length
1333: ; and adjust the remainder term
1334:
1335: ; We've unrolled our loop a bit, so this should look familiar to the above:
1336:
1337: sub ebx, esi ; subtract strip length from line length
1338: jle final_strip ; if negative, done with line
1339:
1340: mov [eax], esi ; write strip length to strip array
1341: add eax, 4 ; adjust strip pointer
1342:
1343: ; Note that banking requires us to check if the strip array is full here
1344: ; too (and note that if output_strips is called it will return to
1345: ; done_output_strips):
1346:
1347: cmp plStripEnd, eax
1348: jbe short output_strips
1349:
1350: mov esi, d_I ; our normal strip length
1351: add edi, edx ; adjust our remainder term
1352: jl short dda_loop
1353:
1354: sub edi, ecx ; our remainder became 1 or more, so
1355: inc esi ; adjust
1356: jmp short dda_loop
1357:
1358: zero_slope:
1359: mov esi, 7fffffffh
1360: jmp short done_dda_stuff
1361:
1362: ;-----------------------------------------------------------------------;
1363: ; Empty strips buffer & possibly do x-major bank switch. ;
1364: ;-----------------------------------------------------------------------;
1365:
1366: output_strips:
1367: mov d_R, edx
1368: mov cPels, ebx
1369: mov i, esi
1370: mov r, edi
1371: mov dN, ecx
1372:
1373: lea edx, [strip]
1374: mov ecx, pls
1375:
1376: ; Call our strip routine:
1377:
1378: ptrCall <dword ptr pfn>, \
1379: <edx, ecx, eax>
1380:
1381: ; It may be that we ran out of run in our strips buffer, and don't
1382: ; actually have to switch banks. See if that's the case:
1383:
1384: mov eax, cStripsInNextRun
1385: or eax, eax
1386: jg short done_strip_bank_switch
1387:
1388: ; We have to switch banks. See if we're going up or down:
1389:
1390: mov esi, pdsurf
1391: test fl, FL_FLIP_V
1392: jz short bank_x_down
1393:
1394: bank_x_up:
1395: mov edi, strip.ST_pjScreen
1396: sub edi, [esi].dsurf_pvBitmapStart
1397: mov ebx, [esi].dsurf_rcl1WindowClip.yTop
1398: dec ebx ; we want yTop - 1 to be mapped in
1399:
1400: ; Map in the next higher bank:
1401:
1402: ptrCall <dword ptr [esi].dsurf_pfnBankControl>, \
1403: <esi, ebx, JustifyBottom>; ebx, esi and edi are preserved
1404:
1405: lea eax, [ebx + 1]
1406: sub eax, [esi].dsurf_rcl1WindowClip.yTop
1407: ; eax = # of scans can do in bank
1408:
1409: add edi, [esi].dsurf_pvBitmapStart
1410: mov strip.ST_pjScreen, edi
1411:
1412: jmp short done_strip_bank_switch
1413:
1414: bank_x_down:
1415: mov edi, strip.ST_pjScreen
1416: sub edi, [esi].dsurf_pvBitmapStart
1417: mov ebx, [esi].dsurf_rcl1WindowClip.yBottom
1418:
1419: ; Map in the next lower bank:
1420:
1421: ptrCall <dword ptr [esi].dsurf_pfnBankControl>, \
1422: <esi, ebx, JustifyTop> ; ebx, esi and edi are preserved
1423:
1424: mov eax, [esi].dsurf_rcl1WindowClip.yBottom
1425: sub eax, ebx ; eax = # scans can do in bank
1426:
1427: add edi, [esi].dsurf_pvBitmapStart
1428: mov strip.ST_pjScreen,edi
1429:
1430: done_strip_bank_switch:
1431:
1432: ; eax = cStripsInNextRun
1433:
1434: lea edx, [strip.ST_alStrips + (STRIP_MAX * 4)]
1435: sub eax, STRIP_MAX
1436: mov cStripsInNextRun, eax
1437: jge short get_ready_for_more_strips
1438: lea edx, [edx + eax * 4]
1439:
1440: get_ready_for_more_strips:
1441: mov plStripEnd, edx
1442:
1443: mov esi, i
1444: mov edi, r
1445: mov ebx, cPels
1446: mov edx, d_R
1447: mov ecx, dN
1448: lea eax, [strip.ST_alStrips]
1449: jmp done_output_strips
1450:
1451: ;-----------------------------------------------------------------------;
1452: ; Empty strips buffer. Either get new line or do y-major bank switch. ;
1453: ;-----------------------------------------------------------------------;
1454:
1455: final_strip:
1456: add ebx, esi
1457: mov [eax], ebx
1458: add eax, 4
1459:
1460: cmp cPelsAfterThisBank, 0
1461: jg short bank_y_major
1462:
1463: very_final_strip:
1464: lea edx, [strip]
1465: mov ecx, pls
1466:
1467: ptrCall <dword ptr pfn>, \
1468: <edx, ecx, eax>
1469:
1470: ; NOTE: next_line is jumped to from various places, and it cannot assume
1471: ; any registers are loaded.
1472:
1473: next_line:
1474: mov ebx, flStart
1475: testb ebx, FL_COMPLEX_CLIP
1476: jnz short see_if_done_complex_clipping
1477:
1478: mov edx, pptfxBuf
1479: cmp edx, pptfxBufEnd
1480: je short all_done
1481:
1482: mov esi, [edx].ptl_x
1483: mov ecx, [edx].ptl_y
1484: add edx, size POINTL
1485: mov pptfxBuf, edx
1486: mov eax, [edx].ptl_x
1487: mov edi, [edx].ptl_y
1488: jmp the_main_loop
1489:
1490: all_done:
1491: mov eax, 1
1492:
1493: cRet bLines
1494:
1495: see_if_done_complex_clipping:
1496: mov ebx, fl
1497: dec cptfx
1498: jz short all_done
1499:
1500: and ebx, NOT FL_FLIP_HALF ; Make sure the next run doesn't have
1501: mov fl, ebx ; to do a half-flip if it doesn't
1502: ; want to
1503: jmp continue_complex_clipping
1504:
1505: ;-----------------------------------------------------------------------;
1506: ; Switch banks for a y-major line. ;
1507: ;-----------------------------------------------------------------------;
1508:
1509: public bank_y_major
1510: bank_y_major:
1511: mov d_R, edx
1512: mov i, esi
1513: mov r, edi
1514: mov dN, ecx
1515: sub ebx, esi ; Undo our offset
1516:
1517: bank_y_output_strips:
1518: lea edx, [strip]
1519: mov ecx, pls
1520:
1521: ptrCall <dword ptr pfn>, \
1522: <edx, ecx, eax>
1523:
1524: mov esi, pdsurf
1525: test fl, FL_FLIP_V
1526: jz short bank_y_down
1527:
1528: bank_y_up:
1529: mov edi, strip.ST_pjScreen
1530: sub edi, [esi].dsurf_pvBitmapStart
1531: mov ecx, [esi].dsurf_rcl1WindowClip.yTop
1532: push ecx
1533: dec ecx ; we want yTop - 1 to be mapped in
1534:
1535: ; Map in the next higher bank:
1536:
1537: ptrCall <dword ptr [esi].dsurf_pfnBankControl>, \
1538: <esi, ecx, JustifyBottom>; ebx, esi and edi are preserved
1539:
1540: pop ecx
1541: sub ecx, [esi].dsurf_rcl1WindowClip.yTop
1542: ; ecx = # of scans can do in bank
1543:
1544: add edi, [esi].dsurf_pvBitmapStart
1545: mov strip.ST_pjScreen, edi
1546:
1547: mov edx, cPelsAfterThisBank ; edx = cPelsAfterBank
1548: lea eax, [strip.ST_alStrips] ; eax = plStrip
1549: or ebx, ebx ; ebx = cPels
1550: jge bank_y_done_partial_strip
1551: jmp short bank_y_done_switch
1552:
1553: bank_y_down:
1554: mov edi, strip.ST_pjScreen
1555: sub edi, [esi].dsurf_pvBitmapStart
1556: mov ecx, [esi].dsurf_rcl1WindowClip.yBottom
1557: push ecx
1558:
1559: ; Map in the next lower bank:
1560:
1561: ptrCall <dword ptr [esi].dsurf_pfnBankControl>, \
1562: <esi, ecx, JustifyTop> ; ebx, esi and edi are preserved
1563:
1564: pop eax
1565: mov ecx, [esi].dsurf_rcl1WindowClip.yBottom
1566: sub ecx, eax ; ecx = # scans can do in bank
1567:
1568: add edi, [esi].dsurf_pvBitmapStart
1569: mov strip.ST_pjScreen, edi
1570:
1571: mov edx, cPelsAfterThisBank ; edx = cPelsAfterBank
1572: lea eax, [strip.ST_alStrips] ; eax = plStrip
1573: or ebx, ebx ; ebx = cPels
1574: jge short bank_y_done_partial_strip
1575:
1576: bank_y_done_switch:
1577:
1578: ; Handle a single strip stretching over multiple banks:
1579:
1580: test fl, FL_FLIP_HALF
1581: jz short bank_y_no_half_flip
1582:
1583: ; We now have to adjust for the fact that the strip drawers always leave
1584: ; the state ready for the next new strip (e.g., if we're doing vertical
1585: ; strips, it advances pjScreen one to the right after drawing each strip).
1586: ; But the problem is that since we crossed a bank, we have to continue the
1587: ; *old* strip, so we have to undo that advance:
1588:
1589: bank_y_half_flip:
1590: ror strip.ST_jStyleMask, 1
1591: ror strip.ST_jBitMask, 1
1592: adc strip.ST_pjScreen, 0
1593: jmp short bank_y_done_bit_adjust
1594:
1595: bank_y_no_half_flip:
1596: rol strip.ST_jStyleMask, 1
1597: rol strip.ST_jBitMask, 1
1598: sbb strip.ST_pjScreen, 0
1599:
1600: bank_y_done_bit_adjust:
1601: mov esi, ebx
1602: neg esi ; esi = # pels left in strip
1603:
1604: ; eax = pointer to first strip entry
1605: ; ebx = negative esi
1606: ; ecx = # of pels we can put down in this window
1607: ; edx = # of pels remaining to do in line
1608: ; esi = # of pels left in strip
1609:
1610: ; We have three special cases to check here:
1611: ;
1612: ; 1) If the strip spans the entire next window
1613: ; 2) This is the last strip in the line
1614: ; 3) Neither of the above
1615:
1616: cmp edx,ecx ;if line shorter than bank,
1617: jle short bank_y_check_if_last_strip; know strip doesn't span bank
1618:
1619: cmp esi,ecx ;if line spans bank, don't have
1620: jl short bank_y_continue_strip ; to check if last strip
1621:
1622: ; If ((# of pels in line > window size) && (# of pels in strip > window size))
1623: ; then the strip spans this bank:
1624:
1625: mov [eax], ecx
1626: add eax, 4
1627: add ebx, ecx
1628: sub edx, ecx
1629: mov cPelsAfterThisBank, edx
1630: jmp bank_y_output_strips
1631:
1632: bank_y_check_if_last_strip:
1633: cmp esi, edx ;if strip is shorter than line,
1634: jl short bank_y_continue_strip ; we know this isn't the last
1635: ; strip
1636:
1637: ; Handle case where this is the last strip in the line and it overlaps a bank:
1638:
1639: mov [eax], edx
1640: add eax, 4
1641: jmp very_final_strip
1642:
1643: bank_y_continue_strip:
1644: mov [eax], esi
1645: add eax, 4
1646:
1647: bank_y_done_partial_strip:
1648: add ebx, edx ; cPels += cPelsAfterThisBank
1649: sub edx, ecx ; cPelsAfterThisBank -= cyWindow
1650:
1651: jle short bank_y_get_ready
1652: sub ebx, edx
1653:
1654: bank_y_get_ready:
1655: mov cPelsAfterThisBank, edx
1656: mov edi, r
1657: mov edx, d_R
1658: mov ecx, dN
1659: jmp done_output_strips
1660:
1661: ;---------------------------Private-Routine-----------------------------;
1662: ; do_some_styling
1663: ;
1664: ; Inputs:
1665: ; eax = ptlStart.ptl_y
1666: ; ebx = fl
1667: ; ecx = ptlStart.ptl_x
1668: ; Preserves:
1669: ; eax, ebx, ecx
1670: ; Output:
1671: ; Exits to done_styling.
1672: ;
1673: ;-----------------------------------------------------------------------;
1674:
1675: public do_some_styling
1676: do_some_styling:
1677: mov esi, pls
1678: mov ptlStart.ptl_x, ecx
1679:
1680: mov edi, [esi].LS_spNext ; spThis
1681: mov edx, edi
1682: add edx, cStylePels ; spNext
1683:
1684: testb ebx, FL_ALTERNATESTYLED
1685: jz short do_non_alternate_style
1686:
1687: ; Do alternate styles:
1688:
1689: and edx, 1
1690: mov [esi].LS_spNext, edx
1691: testb ebx, FL_FLIP_H
1692: jz short alternate_left_to_right
1693:
1694: add ecx, edx
1695: sub ecx, x0
1696: add ecx, xStart ; ptlStart.x + spNext - x0 + xStart + 1
1697: inc ecx
1698: jmp short compute_alternate_mask
1699:
1700: alternate_left_to_right:
1701: add ecx, edi
1702: add ecx, x0
1703: sub ecx, xStart ; ptlStart.x + spThis + x0 - xStart
1704:
1705: compute_alternate_mask:
1706: mov strip.ST_jStyleMask, 55h
1707: ror strip.ST_jStyleMask, cl
1708:
1709: mov strip.ST_spRemaining, 1
1710: mov strip.ST_xyDensity, 1
1711: mov ecx, ptlStart.ptl_x
1712: jmp done_styling
1713:
1714: do_non_alternate_style:
1715:
1716: ; For styles, we don't bother to keep the style position normalized.
1717: ; (we do ensure that it's positive, though). If a figure is over 2
1718: ; billion pels long, we'll be a pel off in our style state (oops!).
1719:
1720: and edx, 7fffffffh
1721: mov [esi].LS_spNext, edx
1722: mov ptlStart.ptl_y, eax
1723: testb ebx, FL_MASKSTYLED
1724: jz short do_arbitrary_style
1725:
1726: ; Do mask styles:
1727:
1728: mov eax, [esi].LS_xyDensity ; Gotta copy to strips struct
1729: mov strip.ST_xyDensity, eax
1730:
1731: testb ebx, FL_FLIP_H
1732: jz short mask_left_to_right
1733:
1734: sub edx, x0
1735: add edx, xStart
1736: add edx, 2
1737: mov eax, edx
1738: xor edx, edx
1739:
1740: mov edi, STYLE_DENSITY
1741: div edi
1742: add ecx, eax
1743: inc edx
1744: mov eax, [esi].LS_ulStyleMaskRtoL
1745: jmp short compute_masked_mask
1746:
1747: mask_left_to_right:
1748: add edi, x0
1749: sub edi, xStart
1750: mov eax, edi
1751: xor edx, edx
1752: mov edi, STYLE_DENSITY
1753: div edi
1754: sub ecx, eax
1755: neg edx
1756: add edx, STYLE_DENSITY
1757: mov eax, [esi].LS_ulStyleMaskLtoR
1758:
1759: compute_masked_mask:
1760: mov strip.ST_spRemaining, edx
1761: ror al, cl
1762: mov strip.ST_jStyleMask, al
1763: mov eax, ptlStart.ptl_y
1764: mov ecx, ptlStart.ptl_x
1765: jmp done_styling
1766:
1767: ; Do arbitrary styles:
1768:
1769: do_arbitrary_style:
1770: testb ebx, FL_FLIP_H
1771: jz short arbitrary_left_to_right
1772:
1773: sub edx, x0
1774: add edx, xStart
1775: mov eax, edx
1776: xor edx, edx
1777: div [esi].LS_spTotal
1778:
1779: neg edx
1780: jge short continue_right_to_left
1781: add edx, [esi].LS_spTotal
1782: not eax
1783:
1784: continue_right_to_left:
1785: mov edi, dword ptr [esi].LS_jStartMask
1786: not edi
1787: mov ecx, [esi].LS_aspRtoL
1788: jmp short compute_arbitrary_stuff
1789:
1790: arbitrary_left_to_right:
1791: add edi, x0
1792: sub edi, xStart
1793: mov eax, edi
1794: xor edx, edx
1795: div [esi].LS_spTotal
1796: mov edi, dword ptr [esi].LS_jStartMask
1797: mov ecx, [esi].LS_aspLtoR
1798:
1799: compute_arbitrary_stuff:
1800: ; eax = sp / spTotal
1801: ; ebx = fl
1802: ; ecx = pspStart
1803: ; edx = sp % spTotal
1804: ; esi = pls
1805: ; edi = jStyleMask
1806:
1807: and eax, [esi].LS_cStyle ; if odd length style and second run
1808: and al, 1 ; through style array, flip the
1809: jz short odd_style_array_done ; meaning of the elements
1810: not edi
1811:
1812: odd_style_array_done:
1813: mov eax, [esi].LS_cStyle
1814: mov strip.ST_pspStart, ecx
1815: lea eax, [ecx + eax * 4 - 4]
1816: mov strip.ST_pspEnd, eax
1817:
1818: find_psp:
1819: sub edx, [ecx]
1820: jl short found_psp
1821: add ecx, 4
1822: jmp short find_psp
1823:
1824: found_psp:
1825: mov strip.ST_psp, ecx
1826: neg edx
1827: mov strip.ST_spRemaining, edx
1828:
1829: sub ecx, strip.ST_pspStart
1830: test ecx, 4 ; size STYLEPOS
1831: jz short done_arbitrary
1832: not edi
1833:
1834: done_arbitrary:
1835: mov dword ptr strip.ST_jStyleMask, edi
1836: mov eax, ptlStart.ptl_y
1837: mov ecx, ptlStart.ptl_x
1838: jmp done_styling
1839:
1840: ;---------------------------Private-Routine-----------------------------;
1841: ; do_some_clipping
1842: ;
1843: ; Inputs:
1844: ; eax = garbage
1845: ; ebx = fl
1846: ; ecx = x0
1847: ; edx = garbage
1848: ; esi = x1
1849: ; edi = garbage
1850: ;
1851: ; Decides whether to do simple or complex clipping.
1852: ;
1853: ;-----------------------------------------------------------------------;
1854:
1855: align 4
1856:
1857: public do_some_clipping
1858: do_some_clipping:
1859: testb ebx, FL_COMPLEX_CLIP
1860: jnz initialize_complex_clipping
1861:
1862: ;-----------------------------------------------------------------------;
1863: ; simple_clipping
1864: ;
1865: ; Inputs:
1866: ; ebx = fl
1867: ; ecx = x0
1868: ; esi = x1
1869: ; Output:
1870: ; ebx = fl
1871: ; ecx = new x0 (stack variable updated too)
1872: ; esi = new x1
1873: ; y0 stack variable updated
1874: ; Uses:
1875: ; All registers
1876: ; Exits:
1877: ; to done_clipping
1878: ;
1879: ; This routine handles clipping the line to the clip rectangle (it's
1880: ; faster to handle this case in the driver than to call the engine to
1881: ; clip for us).
1882: ;
1883: ; Fractional end-point lines complicate our lives a bit when doing
1884: ; clipping:
1885: ;
1886: ; 1) For styling, we must know the unclipped line's length in pels, so
1887: ; that we can correctly update the styling state when the line is
1888: ; clipped. For this reason, I do clipping after doing the hard work
1889: ; of figuring out which pixels are at the ends of the line (this is
1890: ; wasted work if the line is not styled and is completely clipped,
1891: ; but I think it's simpler this way). Another reason is that we'll
1892: ; have calculated eqGamma already, which we use for the intercept
1893: ; calculations.
1894: ;
1895: ; With the assumption that most lines will not be completely clipped
1896: ; away, this strategy isn't too painful.
1897: ;
1898: ; 2) x0, y0 are not necessarily zero, where (x0, y0) is the start pel of
1899: ; the line.
1900: ;
1901: ; 3) We know x0, y0 and x1, but not y1. We haven't needed to calculate
1902: ; y1 until now. We'll need the actual value, and not an upper bound
1903: ; like y1 = LFLOOR(dM) + 2 because we have to be careful when
1904: ; calculating x(y) that y0 <= y <= y1, otherwise we can cause an
1905: ; overflow on the divide (which, needless to say, is bad).
1906: ;
1907: ;-----------------------------------------------------------------------;
1908:
1909: public simple_clipping
1910: simple_clipping:
1911: mov edi, prclClip ; get pointer to normalized clip rect
1912: and ebx, FL_RECTLCLIP_MASK ; (it's lower-right exclusive)
1913:
1914: .errnz (FL_RECTLCLIP_SHIFT - 2); ((ebx AND FL_RECTLCLIP_MASK) shr
1915: .errnz (size RECTL) - 16 ; FL_RECTLCLIP_SHIFT) is our index
1916: lea edi, [edi + ebx*4] ; into the array of rectangles
1917:
1918: mov edx, [edi].xRight ; load the rect coordinates
1919: mov eax, [edi].xLeft
1920: mov ebx, [edi].yBottom
1921: mov edi, [edi].yTop
1922:
1923: ; Translate to our origin and so some quick completely clipped tests:
1924:
1925: sub edx, x
1926: cmp ecx, edx
1927: jge totally_clipped ; totally clipped if x0 >= xRight
1928:
1929: sub eax, x
1930: cmp esi, eax
1931: jl totally_clipped ; totally clipped if x1 < xLeft
1932:
1933: sub ebx, y
1934: cmp y0, ebx
1935: jge totally_clipped ; totally clipped if y0 >= yBottom
1936:
1937: sub edi, y
1938:
1939: ; Save some state:
1940:
1941: mov xClipRight, edx
1942: mov xClipLeft, eax
1943:
1944: cmp esi, edx ; if (x1 >= xRight) x1 = xRight - 1
1945: jl short calculate_y1
1946: lea esi, [edx - 1]
1947:
1948: calculate_y1:
1949: mov eax, esi ; y1 = (x1 * dN + eqGamma) / dM
1950: mul dN
1951: add eax, eqGamma_lo
1952: adc edx, eqGamma_hi
1953: div dM
1954:
1955: cmp edi, eax ; if (yTop > y1) clipped
1956: jg short totally_clipped
1957:
1958: cmp ebx, eax ; if (yBottom > y1) know x1
1959: jg short x1_computed
1960:
1961: mov eax, ebx ; x1 = (yBottom * dM + eqBeta) / dN
1962: mul dM
1963: stc
1964: sbb eax, eqGamma_lo
1965: sbb edx, eqGamma_hi
1966: div dN
1967: mov esi, eax
1968:
1969: ; At this point, we've taken care of calculating the intercepts with the
1970: ; right and bottom edges. Now we work on the left and top edges:
1971:
1972: x1_computed:
1973: mov edx, y0
1974:
1975: mov eax, xClipLeft ; don't have to compute y intercept
1976: cmp eax, ecx ; at left edge if line starts to
1977: jle short top_intercept ; right of left edge
1978:
1979: mov ecx, eax ; x0 = xLeft
1980: mul dN ; y0 = (xLeft * dN + eqGamma) / dM
1981: add eax, eqGamma_lo
1982: adc edx, eqGamma_hi
1983: div dM
1984:
1985: cmp ebx, eax ; if (yBottom <= y0) clipped
1986: jle short totally_clipped
1987:
1988: mov edx, eax
1989: mov y0, eax
1990:
1991: top_intercept:
1992: mov ebx, fl ; get ready to leave
1993: mov x0, ecx
1994:
1995: cmp edi, edx ; if (yTop <= y0) done clipping
1996: jle done_clipping
1997:
1998: mov eax, edi ; x0 = (yTop * dM + eqBeta) / dN + 1
1999: mul dM
2000: stc
2001: sbb eax, eqGamma_lo
2002: sbb edx, eqGamma_hi
2003: div dN
2004: lea ecx, [eax + 1]
2005:
2006: cmp xClipRight, ecx ; if (xRight <= x0) clipped
2007: jle short totally_clipped
2008:
2009: mov y0, edi ; y0 = yTop
2010: mov x0, ecx
2011: jmp done_clipping ; all done!
2012:
2013: totally_clipped:
2014:
2015: ; The line is completely clipped. See if we have to update our style state:
2016:
2017: mov ebx, fl
2018: testb ebx, FL_STYLED
2019: jz next_line
2020:
2021: ; Adjust our style state:
2022:
2023: mov esi, pls
2024: mov eax, [esi].LS_spNext
2025: add eax, cStylePels
2026: mov [esi].LS_spNext, eax
2027:
2028: cmp eax, [esi].LS_spTotal2
2029: jb next_line
2030:
2031: ; Have to normalize first:
2032:
2033: xor edx, edx
2034: div [esi].LS_spTotal2
2035: mov [esi].LS_spNext, edx
2036:
2037: jmp next_line
2038:
2039: ;-----------------------------------------------------------------------;
2040:
2041: initialize_complex_clipping:
2042: mov eax, dN ; save a copy of original dN
2043: mov dN_Original, eax
2044:
2045: ;---------------------------Private-Routine-----------------------------;
2046: ; continue_complex_clipping
2047: ;
2048: ; Inputs:
2049: ; ebx = fl
2050: ; Output:
2051: ; ebx = fl
2052: ; ecx = x0
2053: ; esi = x1
2054: ; Uses:
2055: ; All registers.
2056: ; Exits:
2057: ; to done_clipping
2058: ;
2059: ; This routine handles the necessary initialization for the next
2060: ; run in the CLIPLINE structure.
2061: ;
2062: ; NOTE: This routine is jumped to from two places!
2063: ;-----------------------------------------------------------------------;
2064:
2065: public continue_complex_clipping
2066: continue_complex_clipping:
2067: mov edi, prun
2068: mov ecx, xStart
2069: testb ebx, FL_FLIP_H
2070: jz short complex_left_to_right
2071:
2072: complex_right_to_left:
2073:
2074: ; Figure out x0 and x1 for right-to-left lines:
2075:
2076: add ecx, cStylePels
2077: dec ecx
2078: mov esi, ecx ; esi = ecx = xStart + cStylePels - 1
2079: sub ecx, [edi].RUN_iStop ; New x0
2080: sub esi, [edi].RUN_iStart ; New x1
2081: jmp short complex_reset_variables
2082:
2083: complex_left_to_right:
2084:
2085: ; Figure out x0 and x1 for left-to-right lines:
2086:
2087: mov esi, ecx ; esi = ecx = xStart
2088: add ecx, [edi].RUN_iStart ; New x0
2089: add esi, [edi].RUN_iStop ; New x1
2090:
2091: complex_reset_variables:
2092: mov x0, ecx
2093:
2094: ; The half flip mucks with some of our variables, and we have to reset
2095: ; them every pass. We would have to reset eqGamma too, but it never
2096: ; got saved to memory in its modified form.
2097:
2098: add edi, size RUN
2099: mov prun, edi ; Increment run pointer for next time
2100:
2101: mov edi, pls
2102: mov eax, [edi].LS_spComplex
2103: mov [edi].LS_spNext, eax ; pls->spNext = pls->spComplex
2104:
2105: mov eax, dN_Original ; dN = dN_Original
2106: mov dN, eax
2107:
2108: mul ecx
2109: add eax, eqGamma_lo
2110: adc edx, eqGamma_hi ; [edx:eax] = dN*x0 + eqGamma
2111:
2112: div dM
2113: mov y0, eax
2114: jmp done_clipping
2115:
2116: endProc bLines
2117:
2118: _TEXT$03 ends
2119:
2120: end
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.