|
|
1.1 ! root 1: ;---------------------------Module-Header------------------------------; ! 2: ; Module Name: lines.asm ! 3: ; ! 4: ; Draws a set of connected polylines. ! 5: ; ! 6: ; The actual pixel-lighting code is different depending on if the lines ! 7: ; are styled/unstyled and we're doing an arbitrary ROP or set-style ROP. ! 8: ; ! 9: ; Lines are drawn from left to right. So if a line moves from right ! 10: ; to left, the endpoints are swapped and the line is drawn from left to ! 11: ; right. ! 12: ; ! 13: ; See s3\lines.cxx for a portable version (sans simple clipping). ! 14: ; ! 15: ; Copyright (c) 1992 Microsoft Corporation ! 16: ;-----------------------------------------------------------------------; ! 17: ! 18: .386 ! 19: ! 20: .model small,c ! 21: ! 22: assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT ! 23: assume fs:nothing,gs:nothing ! 24: ! 25: .xlist ! 26: include stdcall.inc ;calling convention cmacros ! 27: include i386\egavga.inc ! 28: include i386\strucs.inc ! 29: include i386\driver.inc ! 30: include i386\lines.inc ! 31: .list ! 32: ! 33: .data ! 34: ! 35: public gaflRoundTable ! 36: gaflRoundTable label dword ! 37: dd FL_H_ROUND_DOWN + FL_V_ROUND_DOWN ; no flips ! 38: dd FL_H_ROUND_DOWN + FL_V_ROUND_DOWN ; D flip ! 39: dd FL_H_ROUND_DOWN ; V flip ! 40: dd FL_V_ROUND_DOWN ; D & V flip ! 41: dd FL_V_ROUND_DOWN ; slope one ! 42: dd 0baadf00dh ! 43: dd FL_H_ROUND_DOWN ; slope one & V flip ! 44: dd 0baadf00dh ! 45: ! 46: .code ! 47: ! 48: ;--------------------------------Macro----------------------------------; ! 49: ; testb ebx, <mask> ! 50: ; ! 51: ; Substitutes a byte compare if the mask is entirely in the lo-byte or ! 52: ; hi-byte (thus saving 3 bytes of code space). ! 53: ; ! 54: ;-----------------------------------------------------------------------; ! 55: ! 56: TESTB macro targ,mask,thirdarg ! 57: local mask2,delta ! 58: ! 59: ifnb <thirdarg> ! 60: .err TESTB mask must be enclosed in brackets! ! 61: endif ! 62: ! 63: delta = 0 ! 64: mask2 = mask ! 65: ! 66: if mask2 AND 0ffff0000h ! 67: test targ,mask ; If bit set in hi-word, ! 68: exitm ; test entire dword ! 69: endif ! 70: ! 71: if mask2 AND 0ff00h ! 72: if mask2 AND 0ffh ; If bit set in lo-byte and ! 73: test targ,mask ; hi-byte, test entire dword ! 74: exitm ! 75: endif ! 76: ! 77: mask2 = mask2 SHR 8 ! 78: delta = 1 ! 79: endif ! 80: ! 81: ifidni <targ>,<EBX> ! 82: if delta ! 83: test bh,mask2 ! 84: else ! 85: test bl,mask2 ! 86: endif ! 87: exitm ! 88: endif ! 89: ! 90: .err Too bad TESTB doesn't support targets other than ebx! ! 91: endm ! 92: ! 93: ;---------------------------Public-Routine------------------------------; ! 94: ; BOOL bLines(ppdev, pptfxFirst, pptfxBuf, prun, cptfx, pls, ! 95: ; prclClip, apfn[], flStart) ! 96: ; ! 97: ; Do all the DDA calculations for lines. ! 98: ; ! 99: ; Doing Lines Right ! 100: ; ----------------- ! 101: ; ! 102: ; In NT, all lines are given to the device driver in fractional ! 103: ; coordinates, in a 28.4 fixed point format. The lower 4 bits are ! 104: ; fractional for sub-pixel positioning. ! 105: ; ! 106: ; Note that you CANNOT! just round the coordinates to integers ! 107: ; and pass the results to your favorite integer Bresenham routine!! ! 108: ; (Unless, of course, you have such a high resolution device that ! 109: ; nobody will notice -- not likely for a display device.) The ! 110: ; fractions give a more accurate rendering of the line -- this is ! 111: ; important for things like our Bezier curves, which would have 'kinks' ! 112: ; if the points in its polyline approximation were rounded to integers. ! 113: ; ! 114: ; Unfortunately, for fractional lines there is more setup work to do ! 115: ; a DDA than for integer lines. However, the main loop is exactly ! 116: ; the same (and can be done entirely with 32 bit math). ! 117: ; ! 118: ; If You've Got Hardware That Does Bresenham ! 119: ; ------------------------------------------ ! 120: ; ! 121: ; A lot of hardware limits DDA error terms to 'n' bits. With fractional ! 122: ; coordinates, 4 bits are given to the fractional part, letting ! 123: ; you draw in hardware only those lines that lie entirely in a 2^(n-4) ! 124: ; by 2^(n-4) pixel space. ! 125: ; ! 126: ; And you still have to correctly draw those lines with coordinates ! 127: ; outside that space! Remember that the screen is only a viewport ! 128: ; onto a 28.4 by 28.4 space -- if any part of the line is visible ! 129: ; you MUST render it precisely, regardless of where the end points lie. ! 130: ; So even if you do it in software, somewhere you'll have to have a ! 131: ; 32 bit DDA routine. ! 132: ; ! 133: ; Our Implementation ! 134: ; ------------------ ! 135: ; ! 136: ; We employ a run length slice algorithm: our DDA calculates the ! 137: ; number of pixels that are in each row (or 'strip') of pixels. ! 138: ; ! 139: ; We've separated the running of the DDA and the drawing of pixels: ! 140: ; we run the DDA for several iterations and store the results in ! 141: ; a 'strip' buffer (which are the lengths of consecutive pixel rows of ! 142: ; the line), then we crank up a 'strip drawer' that will draw all the ! 143: ; strips in the buffer. ! 144: ; ! 145: ; We also employ a 'half-flip' to reduce the number of strip ! 146: ; iterations we need to do in the DDA and strip drawing loops: when a ! 147: ; (normalized) line's slope is more than 1/2, we do a final flip ! 148: ; about the line y = (1/2)x. So now, instead of each strip being ! 149: ; consecutive horizontal or vertical pixel rows, each strip is composed ! 150: ; of those pixels aligned in 45 degree rows. So a line like (0, 0) to ! 151: ; (128, 128) would generate only one strip. ! 152: ; ! 153: ; We also always draw only left-to-right. ! 154: ; ! 155: ; Style lines may have arbitrary style patterns. We specially ! 156: ; optimize the default patterns (and call them 'masked' styles). ! 157: ; ! 158: ; The DDA Derivation ! 159: ; ------------------ ! 160: ; ! 161: ; Here is how I like to think of the DDA calculation. ! 162: ; ! 163: ; We employ Knuth's "diamond rule": rendering a one-pixel-wide line ! 164: ; can be thought of as dragging a one-pixel-wide by one-pixel-high ! 165: ; diamond along the true line. Pixel centers lie on the integer ! 166: ; coordinates, and so we light any pixel whose center gets covered ! 167: ; by the "drag" region (John D. Hobby, Journal of the Association ! 168: ; for Computing Machinery, Vol. 36, No. 2, April 1989, pp. 209-229). ! 169: ; ! 170: ; We must define which pixel gets lit when the true line falls ! 171: ; exactly half-way between two pixels. In this case, we follow ! 172: ; the rule: when two pels are equidistant, the upper or left pel ! 173: ; is illuminated, unless the slope is exactly one, in which case ! 174: ; the upper or right pel is illuminated. (So we make the edges ! 175: ; of the diamond exclusive, except for the top and left vertices, ! 176: ; which are inclusive, unless we have slope one.) ! 177: ; ! 178: ; This metric decides what pixels should be on any line BEFORE it is ! 179: ; flipped around for our calculation. Having a consistent metric ! 180: ; this way will let our lines blend nicely with our curves. The ! 181: ; metric also dictates that we will never have one pixel turned on ! 182: ; directly above another that's turned on. We will also never have ! 183: ; a gap; i.e., there will be exactly one pixel turned on for each ! 184: ; column between the start and end points. All that remains to be ! 185: ; done is to decide how many pixels should be turned on for each row. ! 186: ; ! 187: ; So lines we draw will consist of varying numbers of pixels on ! 188: ; successive rows, for example: ! 189: ; ! 190: ; ****** ! 191: ; ***** ! 192: ; ****** ! 193: ; ***** ! 194: ; ! 195: ; We'll call each set of pixels on a row a "strip". ! 196: ; ! 197: ; (Please remember that our coordinate space has the origin as the ! 198: ; upper left pixel on the screen; postive y is down and positive x ! 199: ; is right.) ! 200: ; ! 201: ; Device coordinates are specified as fixed point 28.4 numbers, ! 202: ; where the first 28 bits are the integer coordinate, and the last ! 203: ; 4 bits are the fraction. So coordinates may be thought of as ! 204: ; having the form (x, y) = (M/F, N/F) where F is the constant scaling ! 205: ; factor F = 2^4 = 16, and M and N are 32 bit integers. ! 206: ; ! 207: ; Consider the line from (M0/F, N0/F) to (M1/F, N1/F) which runs ! 208: ; left-to-right and whose slope is in the first octant, and let ! 209: ; dM = M1 - M0 and dN = N1 - N0. Then dM >= 0, dN >= 0 and dM >= dN. ! 210: ; ! 211: ; Since the slope of the line is less than 1, the edges of the ! 212: ; drag region are created by the top and bottom vertices of the ! 213: ; diamond. At any given pixel row y of the line, we light those ! 214: ; pixels whose centers are between the left and right edges. ! 215: ; ! 216: ; Let mL(n) denote the line representing the left edge of the drag ! 217: ; region. On pixel row j, the column of the first pixel to be ! 218: ; lit is ! 219: ; ! 220: ; iL(j) = ceiling( mL(j * F) / F) ! 221: ; ! 222: ; Since the line's slope is less than one: ! 223: ; ! 224: ; iL(j) = ceiling( mL([j + 1/2] F) / F ) ! 225: ; ! 226: ; Recall the formula for our line: ! 227: ; ! 228: ; n(m) = (dN / dM) (m - M0) + N0 ! 229: ; ! 230: ; m(n) = (dM / dN) (n - N0) + M0 ! 231: ; ! 232: ; Since the line's slope is less than one, the line representing ! 233: ; the left edge of the drag region is the original line offset ! 234: ; by 1/2 pixel in the y direction: ! 235: ; ! 236: ; mL(n) = (dM / dN) (n - F/2 - N0) + M0 ! 237: ; ! 238: ; From this we can figure out the column of the first pixel that ! 239: ; will be lit on row j, being careful of rounding (if the left ! 240: ; edge lands exactly on an integer point, the pixel at that ! 241: ; point is not lit because of our rounding convention): ! 242: ; ! 243: ; iL(j) = floor( mL(j F) / F ) + 1 ! 244: ; ! 245: ; = floor( ((dM / dN) (j F - F/2 - N0) + M0) / F ) + 1 ! 246: ; ! 247: ; = floor( F dM j - F/2 dM - N0 dM + dN M0) / F dN ) + 1 ! 248: ; ! 249: ; F dM j - [ dM (N0 + F/2) - dN M0 ] ! 250: ; = floor( ---------------------------------- ) + 1 ! 251: ; F dN ! 252: ; ! 253: ; dM j - [ dM (N0 + F/2) - dN M0 ] / F ! 254: ; = floor( ------------------------------------ ) + 1 (1) ! 255: ; dN ! 256: ; ! 257: ; = floor( (dM j + alpha) / dN ) + 1 ! 258: ; ! 259: ; where ! 260: ; ! 261: ; alpha = - [ dM (N0 + F/2) - dN M0 ] / F ! 262: ; ! 263: ; We use equation (1) to calculate the DDA: there are iL(j+1) - iL(j) ! 264: ; pixels in row j. Because we are always calculating iL(j) for ! 265: ; integer quantities of j, we note that the only fractional term ! 266: ; is constant, and so we can 'throw away' the fractional bits of ! 267: ; alpha: ! 268: ; ! 269: ; beta = floor( - [ dM (N0 + F/2) - dN M0 ] / F ) (2) ! 270: ; ! 271: ; so ! 272: ; ! 273: ; iL(j) = floor( (dM j + beta) / dN ) + 1 (3) ! 274: ; ! 275: ; for integers j. ! 276: ; ! 277: ; Note if iR(j) is the line's rightmost pixel on row j, that ! 278: ; iR(j) = iL(j + 1) - 1. ! 279: ; ! 280: ; Similarly, rewriting equation (1) as a function of column i, ! 281: ; we can determine, given column i, on which pixel row j is the line ! 282: ; lit: ! 283: ; ! 284: ; dN i + [ dM (N0 + F/2) - dN M0 ] / F ! 285: ; j(i) = ceiling( ------------------------------------ ) - 1 ! 286: ; dM ! 287: ; ! 288: ; Floors are easier to compute, so we can rewrite this: ! 289: ; ! 290: ; dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F ! 291: ; j(i) = floor( ----------------------------------------------- ) - 1 ! 292: ; dM ! 293: ; ! 294: ; dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F - dM ! 295: ; = floor( ---------------------------------------------------- ) ! 296: ; dM ! 297: ; ! 298: ; dN i + [ dM (N0 + F/2) - dN M0 - 1 ] / F ! 299: ; = floor( ---------------------------------------- ) ! 300: ; dM ! 301: ; ! 302: ; We can once again wave our hands and throw away the fractional bits ! 303: ; of the remainder term: ! 304: ; ! 305: ; j(i) = floor( (dN i + gamma) / dM ) (4) ! 306: ; ! 307: ; where ! 308: ; ! 309: ; gamma = floor( [ dM (N0 + F/2) - dN M0 - 1 ] / F ) (5) ! 310: ; ! 311: ; We now note that ! 312: ; ! 313: ; beta = -gamma - 1 = ~gamma (6) ! 314: ; ! 315: ; To draw the pixels of the line, we could evaluate (3) on every scan ! 316: ; line to determine where the strip starts. Of course, we don't want ! 317: ; to do that because that would involve a multiply and divide for every ! 318: ; scan. So we do everything incrementally. ! 319: ; ! 320: ; We would like to easily compute c , the number of pixels on scan j: ! 321: ; j ! 322: ; ! 323: ; c = iL(j + 1) - iL(j) ! 324: ; j ! 325: ; ! 326: ; = floor((dM (j + 1) + beta) / dN) - floor((dM j + beta) / dN) (7) ! 327: ; ! 328: ; This may be rewritten as ! 329: ; ! 330: ; c = floor(i + r / dN) - floor(i + r / dN) (8) ! 331: ; j j+1 j+1 j j ! 332: ; ! 333: ; where i , i are integers and r < dN, r < dN. ! 334: ; j j+1 j j+1 ! 335: ; ! 336: ; Rewriting (7) again: ! 337: ; ! 338: ; c = floor(i + r / dN + dM / dN) - floor(i + r / dN) ! 339: ; j j j j j ! 340: ; ! 341: ; ! 342: ; = floor((r + dM) / dN) - floor(r / dN) ! 343: ; j j ! 344: ; ! 345: ; This may be rewritten as ! 346: ; ! 347: ; c = dI + floor((r + dR) / dN) - floor(r / dN) ! 348: ; j j j ! 349: ; ! 350: ; where dI + dR / dN = dM / dN, dI is an integer and dR < dN. ! 351: ; ! 352: ; r is the remainder (or "error") term in the DDA loop: r / dN ! 353: ; j j ! 354: ; is the exact fraction of a pixel at which the strip ends. To go ! 355: ; on to the next scan and compute c we need to know r . ! 356: ; j+1 j+1 ! 357: ; ! 358: ; So in the main loop of the DDA: ! 359: ; ! 360: ; c = dI + floor((r + dR) / dN) and r = (r + dR) % dN ! 361: ; j j j+1 j ! 362: ; ! 363: ; and we know r < dN, r < dN, and dR < dN. ! 364: ; j j+1 ! 365: ; ! 366: ; We have derived the DDA only for lines in the first octant; to ! 367: ; handle other octants we do the common trick of flipping the line ! 368: ; to the first octant by first making the line left-to-right by ! 369: ; exchanging the end-points, then flipping about the lines y = 0 and ! 370: ; y = x, as necessary. We must record the transformation so we can ! 371: ; undo them later. ! 372: ; ! 373: ; We must also be careful of how the flips affect our rounding. If ! 374: ; to get the line to the first octant we flipped about x = 0, we now ! 375: ; have to be careful to round a y value of 1/2 up instead of down as ! 376: ; we would for a line originally in the first octant (recall that ! 377: ; "In the case where two pels are equidistant, the upper or left ! 378: ; pel is illuminated..."). ! 379: ; ! 380: ; To account for this rounding when running the DDA, we shift the line ! 381: ; (or not) in the y direction by the smallest amount possible. That ! 382: ; takes care of rounding for the DDA, but we still have to be careful ! 383: ; about the rounding when determining the first and last pixels to be ! 384: ; lit in the line. ! 385: ; ! 386: ; Determining The First And Last Pixels In The Line ! 387: ; ------------------------------------------------- ! 388: ; ! 389: ; Fractional coordinates also make it harder to determine which pixels ! 390: ; will be the first and last ones in the line. We've already taken ! 391: ; the fractional coordinates into account in calculating the DDA, but ! 392: ; the DDA cannot tell us which are the end pixels because it is quite ! 393: ; happy to calculate pixels on the line from minus infinity to positive ! 394: ; infinity. ! 395: ; ! 396: ; The diamond rule determines the start and end pixels. (Recall that ! 397: ; the sides are exclusive except for the left and top vertices.) ! 398: ; This convention can be thought of in another way: there are diamonds ! 399: ; around the pixels, and wherever the true line crosses a diamond, ! 400: ; that pel is illuminated. ! 401: ; ! 402: ; Consider a line where we've done the flips to the first octant, and the ! 403: ; floor of the start coordinates is the origin: ! 404: ; ! 405: ; +-----------------------> +x ! 406: ; | ! 407: ; | 0 1 ! 408: ; | 0123456789abcdef ! 409: ; | ! 410: ; | 0 00000000?1111111 ! 411: ; | 1 00000000 1111111 ! 412: ; | 2 0000000 111111 ! 413: ; | 3 000000 11111 ! 414: ; | 4 00000 ** 1111 ! 415: ; | 5 0000 ****1 ! 416: ; | 6 000 1*** ! 417: ; | 7 00 1 **** ! 418: ; | 8 ? *** ! 419: ; | 9 22 3 **** ! 420: ; | a 222 33 *** ! 421: ; | b 2222 333 **** ! 422: ; | c 22222 3333 ** ! 423: ; | d 222222 33333 ! 424: ; | e 2222222 333333 ! 425: ; | f 22222222 3333333 ! 426: ; | ! 427: ; | 2 3 ! 428: ; v ! 429: ; +y ! 430: ; ! 431: ; If the start of the line lands on the diamond around pixel 0 (shown by ! 432: ; the '0' region here), pixel 0 is the first pel in the line. The same ! 433: ; is true for the other pels. ! 434: ; ! 435: ; A little more work has to be done if the line starts in the ! 436: ; 'nether-land' between the diamonds (as illustrated by the '*' line): ! 437: ; the first pel lit is the first diamond crossed by the line (pixel 1 in ! 438: ; our example). This calculation is determined by the DDA or slope of ! 439: ; the line. ! 440: ; ! 441: ; If the line starts exactly half way between two adjacent pixels ! 442: ; (denoted here by the '?' spots), the first pixel is determined by our ! 443: ; round-down convention (and is dependent on the flips done to ! 444: ; normalize the line). ! 445: ; ! 446: ; Last Pel Exclusive ! 447: ; ------------------ ! 448: ; ! 449: ; To eliminate repeatedly lit pels between continuous connected lines, ! 450: ; we employ a last-pel exclusive convention: if the line ends exactly on ! 451: ; the diamond around a pel, that pel is not lit. (This eliminates the ! 452: ; checks we had in the old code to see if we were re-lighting pels.) ! 453: ; ! 454: ; The Half Flip ! 455: ; ------------- ! 456: ; ! 457: ; To make our run length algorithm more efficient, we employ a "half ! 458: ; flip". If after normalizing to the first octant, the slope is more ! 459: ; than 1/2, we subtract the y coordinate from the x coordinate. This ! 460: ; has the effect of reflecting the coordinates through the line of slope ! 461: ; 1/2. Note that the diagonal gets mapped into the x-axis after a half ! 462: ; flip. ! 463: ; ! 464: ; How Many Bits Do We Need, Anyway? ! 465: ; --------------------------------- ! 466: ; ! 467: ; Note that if the line is visible on your screen, you must light up ! 468: ; exactly the correct pixels, no matter where in the 28.4 x 28.4 device ! 469: ; space the end points of the line lie (meaning you must handle 32 bit ! 470: ; DDAs, you can certainly have optimized cases for lesser DDAs). ! 471: ; ! 472: ; We move the origin to (floor(M0 / F), floor(N0 / F)), so when we ! 473: ; calculate gamma from (5), we know that 0 <= M0, N0 < F. And we ! 474: ; are in the first octant, so dM >= dN. Then we know that gamma can ! 475: ; be in the range [(-1/2)dM, (3/2)dM]. The DDI guarantees us that ! 476: ; valid lines will have dM and dN values at most 31 bits (unsigned) ! 477: ; of significance. So gamma requires 33 bits of significance (we store ! 478: ; this as a 64 bit number for convenience). ! 479: ; ! 480: ; When running through the DDA loop, r + dR can have a value in the ! 481: ; j ! 482: ; range 0 <= r < 2 dN; thus the result must be a 32 bit unsigned value. ! 483: ; j ! 484: ; ! 485: ; Testing Lines ! 486: ; ------------- ! 487: ; ! 488: ; To be NT compliant, a display driver must exactly adhere to GIQ, ! 489: ; which means that for any given line, the driver must light exactly ! 490: ; the same pels as does GDI. This can be tested using the Guiman tool ! 491: ; provided elsewhere in the DDK, and 'ZTest', which draws random lines ! 492: ; on the screen and to a bitmap, and compares the results. ! 493: ; ! 494: ; If You've Got Line Hardware ! 495: ; --------------------------- ! 496: ; ! 497: ; If your hardware already adheres to GIQ, you're all set. Otherwise ! 498: ; you'll want to look at the S3 sample code and read the following: ! 499: ; ! 500: ; 1) You'll want to special case integer-only lines, since they require ! 501: ; less processing time and are more common (CAD programs will probably ! 502: ; only ever give integer lines). GDI does not provide a flag saying ! 503: ; that all lines in a path are integer lines; consequently, you will ! 504: ; have to explicitly check every line. ! 505: ; ! 506: ; 2) You are required to correctly draw any line in the 28.4 device ! 507: ; space that intersects the viewport. If you have less than 32 bits ! 508: ; of significance in the hardware for the Bresenham terms, extremely ! 509: ; long lines would overflow the hardware. For such (rare) cases, you ! 510: ; can fall back to strip-drawing code, of which there is a C version in ! 511: ; the S3's lines.cxx (or if your display is a frame buffer, fall back ! 512: ; to the engine). ! 513: ; ! 514: ; 3) If you can explicitly set the Bresenham terms in your hardware, you ! 515: ; can draw non-integer lines using the hardware. If your hardware has ! 516: ; 'n' bits of precision, you can draw GIQ lines that are up to 2^(n-5) ! 517: ; pels long (4 bits are required for the fractional part, and one bit is ! 518: ; used as a sign bit). Note that integer lines don't require the 4 ! 519: ; fractional bits, so if you special case them as in 1), you can do ! 520: ; integer lines that are up to 2^(n - 1) pels long. See the S3's ! 521: ; fastline.asm for an example. ! 522: ; ! 523: ;-----------------------------------------------------------------------; ! 524: ! 525: cProc bLines,36,< \ ! 526: uses esi edi ebx, \ ! 527: ppdev: ptr, \ ! 528: pptfxFirst: ptr, \ ! 529: pptfxBuf: ptr, \ ! 530: prun: ptr, \ ! 531: cptfx: dword, \ ! 532: pls: ptr, \ ! 533: prclClip: ptr, \ ! 534: apfn: ptr, \ ! 535: flStart: dword > ! 536: ! 537: ; ppdev: Surface data ! 538: ; pptfxFirst: Start point of first line ! 539: ; pptfxBuf: All subsequent points ! 540: ; prun: Array of runs if doing complex clipping ! 541: ; cptfx: Number of points in pptfxBuf (i.e., # lines) ! 542: ; pls: Line state ! 543: ; prclClip: Clip rectangle if doing simple clipping ! 544: ; apfn: Pointer to table of strip drawers ! 545: ; flStart: Flags for all lines ! 546: ! 547: local cPelsAfterThisBank: dword ; For bank switching ! 548: local cStripsInNextRun: dword ; For bank switching ! 549: local pptfxBufEnd: ptr ; Last point in pptfxBuf ! 550: local M0: dword ; Normalized x0 in device coords ! 551: local dM: dword ; Delta-x in device coords ! 552: local N0: dword ; Normalized y0 in device coords ! 553: local dN: dword ; Delta-y in device coords ! 554: local fl: dword ; Flags for current line ! 555: local x: dword ; Normalized start pixel x-coord ! 556: local y: dword ; Normalized start pixel y-coord ! 557: local eqGamma_lo: dword ; Upper 32 bits of Gamma ! 558: local eqGamma_hi: dword ; Lower 32 bits of Gamma ! 559: local x0: dword ; Start pixel x-offset ! 560: local y0: dword ; Start pixel y-offset ! 561: local ulSlopeOneAdjustment: dword ; Special offset if line of slope 1 ! 562: local cStylePels: dword ; # of pixels in line (before clip) ! 563: local xStart: dword ; Start pixel x-offset before clip ! 564: local pfn: ptr ; Pointer to strip drawing function ! 565: local cPels: dword ; # pixels to be drawn (after clip) ! 566: local i: dword ; # pixels in strip ! 567: local r: dword ; Remainder (or "error") term ! 568: local d_I: dword ; Delta-I ! 569: local d_R: dword ; Delta-R ! 570: local plStripEnd: ptr ; Last strip in buffer ! 571: local ptlStart[size POINTL]: byte ; Unnormalized start coord ! 572: local dN_Original: dword ; dN before half-flip ! 573: local xClipLeft: dword ; Left side of clip rectangle ! 574: local xClipRight: dword ; Right side of clip rectangle ! 575: local strip[size STRIPS]: byte ; Our strip buffer ! 576: ! 577: ; Do some initializing: ! 578: ! 579: mov esi, pls ! 580: mov ecx, cptfx ! 581: mov edx, pptfxBuf ! 582: lea eax, [edx + ecx * (size POINTL) - (size POINTL)] ! 583: mov pptfxBufEnd, eax ; pptfxBufEnd is inclusive of end point ! 584: ! 585: mov eax, [esi].LS_chAndXor ; copy chAndXor from LINESTATE to STRIPS ! 586: mov strip.ST_chAndXor, eax ; buffer ! 587: ! 588: mov eax, [edx].ptl_x ; Load up end point (M1, N1) ! 589: mov edi, [edx].ptl_y ! 590: ! 591: mov edx, pptfxFirst ; Load up start point (M0, N0) ! 592: mov esi, [edx].ptl_x ! 593: mov ecx, [edx].ptl_y ! 594: ! 595: mov ebx, flStart ! 596: ! 597: ;-----------------------------------------------------------------------; ! 598: ; Flip to the first octant. ; ! 599: ;-----------------------------------------------------------------------; ! 600: ! 601: ; Register state: esi = M0 ! 602: ; ecx = N0 ! 603: ; eax = dM (M1) ! 604: ; edi = dN (N1) ! 605: ; ebx = fl ! 606: ! 607: ; Make sure we go left to right: ! 608: ! 609: public the_main_loop ! 610: the_main_loop: ! 611: cmp esi, eax ! 612: jle short is_left_to_right ; skip if M0 <= M1 ! 613: xchg esi, eax ; swap M0, M1 ! 614: xchg ecx, edi ; swap N0, N1 ! 615: or ebx, FL_FLIP_H ! 616: ! 617: is_left_to_right: ! 618: ! 619: ; Compute the deltas, remembering that the DDI says we should get ! 620: ; deltas less than 2^31. If we get more, we ensure we don't crash ! 621: ; later on by simply skipping the line: ! 622: ! 623: sub eax, esi ; eax = dM ! 624: jo next_line ; dM must be less than 2^31 ! 625: sub edi, ecx ; edi = dN ! 626: jo next_line ; dN must be less than 2^31 ! 627: ! 628: jge short is_top_to_bottom ; skip if dN >= 0 ! 629: neg ecx ; N0 = -N0 ! 630: neg edi ; N1 = -N1 ! 631: or ebx, FL_FLIP_V ! 632: ! 633: is_top_to_bottom: ! 634: cmp edi, eax ! 635: jb short done_flips ; skip if dN < dM ! 636: jne short slope_more_than_one ! 637: ! 638: ; We must special case slopes of one (because of our rounding convention): ! 639: ! 640: or ebx, FL_FLIP_SLOPE_ONE ! 641: jmp short done_flips ! 642: ! 643: slope_more_than_one: ! 644: xchg eax, edi ; swap dM, dN ! 645: xchg esi, ecx ; swap M0, N0 ! 646: or ebx, FL_FLIP_D ! 647: ! 648: done_flips: ! 649: ! 650: mov edx, ebx ! 651: and edx, FL_ROUND_MASK ! 652: .errnz FL_ROUND_SHIFT - 2 ! 653: or ebx, [gaflRoundTable + edx] ; get our rounding flags ! 654: ! 655: mov dM, eax ; save some info ! 656: mov dN, edi ! 657: mov fl, ebx ! 658: ! 659: ; We're going to shift our origin so that it's at the closest integer ! 660: ; coordinate to the left/above our fractional start point (it makes ! 661: ; the math quicker): ! 662: ! 663: mov edx, esi ; x = LFLOOR(M0) ! 664: sar edx, FLOG2 ! 665: mov x, edx ! 666: ! 667: mov edx, ecx ; y = LFLOOR(N0) ! 668: sar edx, FLOG2 ! 669: mov y, edx ! 670: ! 671: ;-----------------------------------------------------------------------; ! 672: ; Compute the fractional remainder term ; ! 673: ;-----------------------------------------------------------------------; ! 674: ! 675: ; By shifting the origin we've contrived to eliminate the integer ! 676: ; portion of our fractional start point, giving us start point ! 677: ; fractional coordinates in the range [0, F - 1]: ! 678: ! 679: and esi, F - 1 ; M0 = FXFRAC(M0) ! 680: and ecx, F - 1 ; N0 = FXFRAC(N0) ! 681: ! 682: ; We now compute Gamma: ! 683: ! 684: mov M0, esi ; save M0, N0 for later ! 685: mov N0, ecx ! 686: ! 687: lea edx, [ecx + F/2] ! 688: mul edx ; [edx:eax] = dM * (N0 + F/2) ! 689: xchg eax, edi ! 690: mov ecx, edx ; [ecx:edi] = dM * (N0 + F/2) ! 691: ; (we just nuked N0) ! 692: ! 693: mul esi ; [edx:eax] = dN * M0 ! 694: ! 695: ; Now gamma = dM * (N0 + F/2) - dN * M0 - bRoundDown ! 696: ! 697: .errnz FL_V_ROUND_DOWN - 8000h ! 698: ror bh, 8 ! 699: sbb edi, eax ! 700: sbb ecx, edx ! 701: ! 702: shrd edi, ecx, FLOG2 ! 703: sar ecx, FLOG2 ; gamma = [ecx:edi] >>= 4 ! 704: ! 705: mov eqGamma_hi, ecx ! 706: mov eqGamma_lo, edi ! 707: ! 708: mov eax, N0 ! 709: ! 710: ; Register state: ! 711: ; eax = N0 ! 712: ; ebx = fl ! 713: ; ecx = eqGamma_hi ! 714: ; edx = garbage ! 715: ; esi = M0 ! 716: ; edi = eqGamma_lo ! 717: ! 718: testb ebx, FL_FLIP_H ! 719: jnz line_runs_right_to_left ! 720: ! 721: ;-----------------------------------------------------------------------; ! 722: ; Figure out which pixels are at the ends of a left-to-right line. ; ! 723: ; --------> ; ! 724: ;-----------------------------------------------------------------------; ! 725: ! 726: public line_runs_left_to_right ! 727: line_runs_left_to_right: ! 728: or esi, esi ! 729: jz short LtoR_check_slope_one ! 730: ; skip ahead if M0 == 0 ! 731: ; (in that case, x0 = 0 which is to be ! 732: ; kept in esi, and is already ! 733: ; conventiently zero) ! 734: ! 735: or eax, eax ! 736: jnz short LtoR_N0_not_zero ! 737: ! 738: .errnz FL_H_ROUND_DOWN - 80h ! 739: ror bl, 8 ! 740: sbb esi, -F/2 ! 741: shr esi, FLOG2 ! 742: jmp short LtoR_check_slope_one ! 743: ; esi = x0 = rounded M0 ! 744: ! 745: LtoR_N0_not_zero: ! 746: sub eax, F/2 ! 747: sbb edx, edx ! 748: xor eax, edx ! 749: sub eax, edx ! 750: cmp esi, eax ! 751: sbb esi, esi ! 752: inc esi ; esi = x0 = (abs(N0 - F/2) <= M0) ! 753: ! 754: public LtoR_check_slope_one ! 755: LtoR_check_slope_one: ! 756: mov ulSlopeOneAdjustment, 0 ! 757: mov eax, ebx ! 758: and eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN ! 759: cmp eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN ! 760: jne short LtoR_compute_y0_from_x0 ! 761: ! 762: ; We have to special case lines that are exactly of slope 1 or -1: ! 763: ! 764: mov eax, N0 ! 765: add eax, dN ! 766: and eax, F - 1 ; eax = N1 ! 767: jz short LtoR_slope_one_check_start_point ! 768: ! 769: mov edx, M0 ! 770: add edx, dM ! 771: and edx, F - 1 ; edx = M1 ! 772: ! 773: add eax, F/2 ! 774: cmp edx, eax ; cmp M1, N1 + F/2 ! 775: jne short LtoR_slope_one_check_start_point ! 776: mov ulSlopeOneAdjustment, -1 ! 777: ! 778: LtoR_slope_one_check_start_point: ! 779: mov eax, M0 ! 780: or eax, eax ! 781: jz short LtoR_compute_y0_from_x0 ! 782: ! 783: add eax, F/2 ! 784: cmp eax, N0 ; cmp M0 + 8, N0 ! 785: jne short LtoR_compute_y0_from_x0 ! 786: ! 787: xor esi, esi ; x0 = 0 ! 788: ! 789: LtoR_compute_y0_from_x0: ! 790: ! 791: ; ecx = eqGamma_hi ! 792: ; esi = x0 ! 793: ; edi = eqGamma_lo ! 794: ! 795: mov eax, dN ! 796: mov edx, dM ! 797: ! 798: mov x0, esi ! 799: mov y0, 0 ! 800: cmp ecx, 0 ! 801: jl short LtoR_compute_x1 ! 802: ! 803: neg esi ! 804: and esi, eax ! 805: sub edx, esi ! 806: cmp edi, edx ! 807: mov edx, dM ! 808: jl short LtoR_compute_x1 ! 809: mov y0, 1 ; y0 = floor((dN * x0 + eqGamma) / dM) ! 810: ! 811: LtoR_compute_x1: ! 812: ! 813: ; Register state: ! 814: ; eax = dN ! 815: ; ebx = fl ! 816: ; ecx = garbage ! 817: ; edx = dM ! 818: ; esi = garbage ! 819: ; edi = garbage ! 820: ! 821: mov esi, M0 ! 822: add esi, edx ! 823: mov ecx, esi ! 824: shr esi, FLOG2 ! 825: dec esi ; x1 = ((M0 + dM) >> 4) - 1 ! 826: add esi, ulSlopeOneAdjustment ! 827: and ecx, F-1 ; M1 = (M0 + dM) & 15 ! 828: jz done_first_pel_last_pel ! 829: ! 830: add eax, N0 ! 831: and eax, F-1 ; N1 = (N0 + dN) & 15 ! 832: jnz short LtoR_N1_not_zero ! 833: ! 834: .errnz FL_H_ROUND_DOWN - 80h ! 835: ror bl, 8 ! 836: sbb ecx, -F/2 ! 837: shr ecx, FLOG2 ; ecx = LROUND(M1, fl & FL_ROUND_DOWN) ! 838: add esi, ecx ! 839: jmp done_first_pel_last_pel ! 840: ! 841: LtoR_N1_not_zero: ! 842: sub eax, F/2 ! 843: sbb edx, edx ! 844: xor eax, edx ! 845: sub eax, edx ! 846: cmp eax, ecx ! 847: jg done_first_pel_last_pel ! 848: inc esi ! 849: jmp done_first_pel_last_pel ! 850: ! 851: ;-----------------------------------------------------------------------; ! 852: ; Figure out which pixels are at the ends of a right-to-left line. ; ! 853: ; <-------- ; ! 854: ;-----------------------------------------------------------------------; ! 855: ! 856: ; Compute x0: ! 857: ! 858: public line_runs_right_to_left ! 859: line_runs_right_to_left: ! 860: mov x0, 1 ; x0 = 1 ! 861: or eax, eax ! 862: jnz short RtoL_N0_not_zero ! 863: ! 864: xor edx, edx ; ulDelta = 0 ! 865: .errnz FL_H_ROUND_DOWN - 80h ! 866: ror bl, 8 ! 867: sbb esi, -F/2 ! 868: shr esi, FLOG2 ; esi = LROUND(M0, fl & FL_H_ROUND_DOWN) ! 869: jz short RtoL_check_slope_one ! 870: ! 871: mov x0, 2 ! 872: mov edx, dN ! 873: jmp short RtoL_check_slope_one ! 874: ! 875: RtoL_N0_not_zero: ! 876: sub eax, F/2 ! 877: sbb edx, edx ! 878: xor eax, edx ! 879: sub eax, edx ! 880: add eax, esi ; eax = ABS(N0 - F/2) + M0 ! 881: xor edx, edx ; ulDelta = 0 ! 882: cmp eax, F ! 883: jle short RtoL_check_slope_one ! 884: ! 885: mov x0, 2 ; x0 = 2 ! 886: mov edx, dN ; ulDelta = dN ! 887: ! 888: public RtoL_check_slope_one ! 889: RtoL_check_slope_one: ! 890: mov ulSlopeOneAdjustment, 0 ! 891: mov eax, ebx ! 892: and eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN ! 893: cmp eax, FL_FLIP_SLOPE_ONE ! 894: jne short RtoL_compute_y0_from_x0 ! 895: ! 896: ; We have to special case lines that are exactly of slope 1 or -1: ! 897: ! 898: mov eax, N0 ! 899: add eax, dN ! 900: and eax, F - 1 ; eax = N1 ! 901: jz short RtoL_slope_one_check_start_point ! 902: ! 903: mov esi, M0 ! 904: add esi, dM ! 905: and esi, F - 1 ; esi = M1 ! 906: ! 907: add eax, F/2 ! 908: cmp esi, eax ; cmp M1, N1 + F/2 ! 909: jne short RtoL_slope_one_check_start_point ! 910: mov ulSlopeOneAdjustment, 1 ! 911: ! 912: RtoL_slope_one_check_start_point: ! 913: mov eax, M0 ! 914: or eax, eax ! 915: jz short RtoL_compute_y0_from_x0 ! 916: ! 917: add eax, F/2 ! 918: cmp eax, N0 ; cmp M0 + 8, N0 ! 919: jne short RtoL_compute_y0_from_x0 ! 920: ! 921: mov x0, 2 ; x0 = 2 ! 922: mov edx, dN ; ulDelta = dN ! 923: ! 924: RtoL_compute_y0_from_x0: ! 925: ! 926: ; eax = garbage ! 927: ; ebx = fl ! 928: ; ecx = eqGamma_hi ! 929: ; edx = ulDelta ! 930: ; esi = garbage ! 931: ; edi = eqGamma_lo ! 932: ! 933: mov eax, dN ; eax = dN ! 934: mov y0, 0 ; y0 = 0 ! 935: ! 936: add edi, edx ! 937: adc ecx, 0 ; eqGamma += ulDelta ! 938: ; NOTE: Setting flags here! ! 939: mov edx, dM ; edx = dM ! 940: jl short RtoL_compute_x1 ; NOTE: Looking at the flags here! ! 941: jg short RtoL_y0_is_2 ! 942: ! 943: lea ecx, [edx + edx] ! 944: sub ecx, eax ; ecx = 2 * dM - dN ! 945: cmp edi, ecx ! 946: jge short RtoL_y0_is_2 ! 947: ! 948: sub ecx, edx ; ecx = dM - dN ! 949: cmp edi, ecx ! 950: jl short RtoL_compute_x1 ! 951: ! 952: mov y0, 1 ! 953: jmp short RtoL_compute_x1 ! 954: ! 955: RtoL_y0_is_2: ! 956: mov y0, 2 ! 957: ! 958: RtoL_compute_x1: ! 959: ! 960: ; Register state: ! 961: ; eax = dN ! 962: ; ebx = fl ! 963: ; ecx = garbage ! 964: ; edx = dM ! 965: ; esi = garbage ! 966: ; edi = garbage ! 967: ! 968: mov esi, M0 ! 969: add esi, edx ! 970: mov ecx, esi ! 971: shr esi, FLOG2 ; x1 = (M0 + dM) >> 4 ! 972: add esi, ulSlopeOneAdjustment ! 973: and ecx, F-1 ; M1 = (M0 + dM) & 15 ! 974: ! 975: add eax, N0 ! 976: and eax, F-1 ; N1 = (N0 + dN) & 15 ! 977: jnz short RtoL_N1_not_zero ! 978: ! 979: .errnz FL_H_ROUND_DOWN - 80h ! 980: ror bl, 8 ! 981: sbb ecx, -F/2 ! 982: shr ecx, FLOG2 ; ecx = LROUND(M1, fl & FL_ROUND_DOWN) ! 983: add esi, ecx ! 984: jmp done_first_pel_last_pel ! 985: ! 986: RtoL_N1_not_zero: ! 987: sub eax, F/2 ! 988: sbb edx, edx ! 989: xor eax, edx ! 990: sub eax, edx ! 991: add eax, ecx ; eax = ABS(N1 - F/2) + M1 ! 992: cmp eax, F+1 ! 993: sbb esi, -1 ! 994: ! 995: done_first_pel_last_pel: ! 996: ! 997: ; Register state: ! 998: ; eax = garbage ! 999: ; ebx = fl ! 1000: ; ecx = garbage ! 1001: ; edx = garbage ! 1002: ; esi = x1 ! 1003: ; edi = garbage ! 1004: ! 1005: mov ecx, x0 ! 1006: lea edx, [esi + 1] ! 1007: sub edx, ecx ; edx = x1 - x0 + 1 ! 1008: ! 1009: jle next_line ! 1010: mov cStylePels, edx ! 1011: mov xStart, ecx ! 1012: ! 1013: ;-----------------------------------------------------------------------; ! 1014: ; See if clipping or styling needs to be done. ; ! 1015: ;-----------------------------------------------------------------------; ! 1016: ! 1017: testb ebx, FL_CLIP ! 1018: jnz do_some_clipping ! 1019: ! 1020: ; Register state: ! 1021: ; eax = garbage ! 1022: ; ebx = fl ! 1023: ; ecx = x0 (stack variable correct too) ! 1024: ; edx = garbage ! 1025: ; esi = x1 ! 1026: ; edi = garbage ! 1027: ! 1028: done_clipping: ! 1029: mov eax, y0 ! 1030: ! 1031: sub esi, ecx ! 1032: inc esi ; esi = cPels = x1 - x0 + 1 ! 1033: mov cPels, esi ! 1034: ! 1035: mov esi, ppdev ! 1036: add ecx, x ; ecx = ptlStart.ptl_x ! 1037: add eax, y ; eax = ptlStart.ptl_y ! 1038: ! 1039: mov esi, [esi].pdev_lNextScan ; we'll compute the sign of lNextScan ! 1040: ! 1041: testb ebx, FL_FLIP_D ! 1042: jz short do_v_unflip ! 1043: xchg ecx, eax ! 1044: ! 1045: do_v_unflip: ! 1046: testb ebx, FL_FLIP_V ! 1047: jz short done_unflips ! 1048: neg eax ! 1049: neg esi ! 1050: ! 1051: done_unflips: ! 1052: mov strip.ST_lNextScan, esi ; lNextScan now right for y-direction ! 1053: testb ebx, FL_STYLED ! 1054: jnz do_some_styling ! 1055: ! 1056: done_styling: ! 1057: lea edx, [strip.ST_alStrips + (STRIP_MAX * 4)] ! 1058: mov plStripEnd, edx ! 1059: ! 1060: mov cPelsAfterThisBank, 0 ! 1061: mov cStripsInNextRun, 7fffffffh ! 1062: ! 1063: ;-----------------------------------------------------------------------; ! 1064: ; Do banking setup. ; ! 1065: ;-----------------------------------------------------------------------; ! 1066: ! 1067: public bank_setup ! 1068: bank_setup: ! 1069: ! 1070: ; Register state: ! 1071: ; eax = ptlStart.ptl_y ! 1072: ; ebx = fl ! 1073: ; ecx = ptlStart.ptl_x ! 1074: ; edx = garbage ! 1075: ; esi = garbage ! 1076: ; edi = garbage ! 1077: ! 1078: mov esi, ppdev ! 1079: cmp eax, [esi].pdev_rcl1WindowClip.yTop ! 1080: jl short bank_get_initial_bank ; ptlStart.y < rcl1WindowClip.yTop ! 1081: ! 1082: cmp eax, [esi].pdev_rcl1WindowClip.yBottom ! 1083: jl short bank_got_initial_bank ; ptlStart.y < rcl1WindowClip.yBot ! 1084: ! 1085: bank_get_initial_bank: ! 1086: mov ptlStart.ptl_y, eax ; Save ptlStart.ptl_y ! 1087: mov edi, ecx ; Save ptlStart.ptl_x ! 1088: ! 1089: .errnz JustifyTop ! 1090: .errnz JustifyBottom - 1 ! 1091: .errnz FL_FLIP_V - 8 ! 1092: ! 1093: mov ecx, ebx ; JustifyTop if line goes down, ! 1094: shr ecx, 3 ; JustifyBottom if line goes up ! 1095: and ecx, 1 ! 1096: ! 1097: bank_justified: ! 1098: ptrCall <dword ptr [esi].pdev_pfnBankControl>, \ ! 1099: <esi, eax, ecx> ! 1100: ! 1101: mov eax, ptlStart.ptl_y ! 1102: mov ecx, edi ! 1103: ! 1104: bank_got_initial_bank: ! 1105: testb ebx, FL_FLIP_D ! 1106: jz short bank_major_x ! 1107: ! 1108: bank_major_y: ! 1109: testb ebx, FL_FLIP_V ! 1110: jz short bank_major_y_down ! 1111: bank_major_y_up: ! 1112: lea edi, [eax + 1] ! 1113: sub edi, [esi].pdev_rcl1WindowClip.yTop ! 1114: jmp short bank_done_y_major ! 1115: bank_major_y_down: ! 1116: mov edi, [esi].pdev_rcl1WindowClip.yBottom ! 1117: sub edi, eax ! 1118: bank_done_y_major: ! 1119: mov esi, cPels ! 1120: sub esi, edi ; edi = cPelsInBank ! 1121: mov cPelsAfterThisBank, esi ! 1122: jle short done_bank_setup ! 1123: mov cPels, edi ! 1124: jmp short done_bank_setup ! 1125: ! 1126: bank_major_x: ! 1127: mov edi, dN ! 1128: shr edi, FLOG2 ! 1129: add edi, y ! 1130: ! 1131: ; We're guessing at the y-position of the end pixel (it's too much work ! 1132: ; to compute the actual value) to see if the line spans more than one ! 1133: ; bank. We have to add at least a slop value of '3' because the actual ! 1134: ; start pixel may be may 2 off from 'y' because of end-pixel exclusiveness, ! 1135: ; and we have to add 1 more because we're taking the floor of (dN / F), to ! 1136: ; account for rounding: ! 1137: ! 1138: add edi, 3 ; yEnd = edi = y + LFLOOR(dN) + 3 ! 1139: testb ebx, FL_FLIP_V ! 1140: jz short bank_major_x_down ! 1141: bank_major_x_up: ! 1142: mov edx, 1 ! 1143: sub edx, [esi].pdev_rcl1WindowClip.yTop ; edx = -yNextBankStart ! 1144: ! 1145: cmp edi, edx ! 1146: lea edx, [edx + eax] ; edx = cStripsInNextRun ! 1147: jl short bank_major_x_done ! 1148: ! 1149: ; Line may go over bank boundary, so don't do a half flip: ! 1150: ! 1151: or ebx, FL_DONT_DO_HALF_FLIP ! 1152: jmp short bank_major_x_done ! 1153: ! 1154: bank_major_x_down: ! 1155: mov esi, [esi].pdev_rcl1WindowClip.yBottom ; esi = yNextBankStart ! 1156: ! 1157: mov edx, esi ! 1158: sub edx, eax ; edx = cStripsInNextRun ! 1159: ! 1160: cmp edi, esi ! 1161: jl short bank_major_x_done ! 1162: or ebx, FL_DONT_DO_HALF_FLIP ! 1163: ! 1164: bank_major_x_done: ! 1165: sub edx, STRIP_MAX ! 1166: mov cStripsInNextRun, edx ! 1167: jge short done_bank_setup ! 1168: ! 1169: lea edx, [strip.ST_alStrips + edx * 4 + (STRIP_MAX * 4)] ! 1170: mov plStripEnd, edx ! 1171: ! 1172: done_bank_setup: ! 1173: ! 1174: ;-----------------------------------------------------------------------; ! 1175: ; Setup to do DDA. ; ! 1176: ;-----------------------------------------------------------------------; ! 1177: ! 1178: ; Register state: ! 1179: ; eax = ptlStart.ptl_y ! 1180: ; ebx = fl ! 1181: ; ecx = ptlStart.ptl_x ! 1182: ; edx = garbage ! 1183: ; esi = garbage ! 1184: ; edi = garbage ! 1185: ! 1186: mov esi, ppdev ! 1187: mov edi, eax ; Now edi = ptlStart.ptl_y ! 1188: imul [esi].pdev_lNextScan ! 1189: add eax, [esi].pdev_pvBitmapStart ! 1190: add eax, ecx ! 1191: mov strip.ST_pjScreen, eax ; pjScreen = pchBits + ptlStart.y * ! 1192: ; cjDelta + ptlStart.x ! 1193: ! 1194: mov eax, dM ! 1195: mov ecx, dN ! 1196: mov esi, eqGamma_lo ! 1197: mov edi, eqGamma_hi ! 1198: ! 1199: ; Register state: ! 1200: ; eax = dM ! 1201: ; ebx = fl ! 1202: ; ecx = dN ! 1203: ; edx = garbage ! 1204: ; esi = eqGamma_lo ! 1205: ; edi = eqGamma_hi ! 1206: ! 1207: lea edx, [ecx + ecx] ; if (2 * dN > dM) ! 1208: cmp edx, eax ! 1209: mov edx, y0 ; Load y0 again ! 1210: jbe short after_half_flip ! 1211: ! 1212: test ebx, FL_DONT_DO_HALF_FLIP ! 1213: jnz short after_half_flip ! 1214: ! 1215: or ebx, FL_FLIP_HALF ! 1216: mov fl, ebx ! 1217: ! 1218: ; Do a half flip! ! 1219: ! 1220: not esi ! 1221: not edi ! 1222: add esi, eax ! 1223: adc edi, 0 ; eqGamma = -eqGamma - 1 + dM ! 1224: ! 1225: neg ecx ! 1226: add ecx, eax ; dN = dM - dN ! 1227: ! 1228: neg edx ! 1229: add edx, x0 ; y0 = x0 - y0 ! 1230: ! 1231: after_half_flip: ! 1232: mov strip.ST_flFlips, ebx ! 1233: and ebx, FL_STRIP_MASK ! 1234: ! 1235: .errnz FL_STRIP_SHIFT ! 1236: mov eax, apfn ! 1237: lea eax, [eax + ebx * 4] ! 1238: mov eax, [eax] ! 1239: mov pfn, eax ! 1240: mov eax, dM ! 1241: ! 1242: ; Register state: ! 1243: ; eax = dM ! 1244: ; ebx = garbage ! 1245: ; ecx = dN ! 1246: ; edx = y0 ! 1247: ; esi = eqGamma_lo ! 1248: ; edi = eqGamma_hi ! 1249: ! 1250: or ecx, ecx ! 1251: jz short zero_slope ! 1252: ! 1253: compute_dda_stuff: ! 1254: inc edx ! 1255: mul edx ! 1256: stc ; set the carry to accomplish -1 ! 1257: sbb eax, esi ! 1258: sbb edx, edi ; (y0 + 1) * dM - eqGamma - 1 ! 1259: div ecx ! 1260: ! 1261: mov esi, eax ; esi = i ! 1262: mov edi, edx ; edi = r ! 1263: ! 1264: xor edx, edx ! 1265: mov eax, dM ! 1266: div ecx ; edx = d_R, eax = d_I ! 1267: mov d_I, eax ! 1268: ! 1269: sub esi, x0 ! 1270: inc esi ! 1271: ! 1272: done_dda_stuff: ! 1273: lea eax, [strip.ST_alStrips] ! 1274: mov ebx, cPels ! 1275: ! 1276: ;-----------------------------------------------------------------------; ! 1277: ; Do our main DDA loop. ; ! 1278: ;-----------------------------------------------------------------------; ! 1279: ! 1280: sub edi, ecx ; offset remainder term from [0..dN) ! 1281: ; to [-dN..0) so test in inner ! 1282: ; loop is quicker ! 1283: align 4 ! 1284: ! 1285: ; Register state: ! 1286: ; eax = plStrip ; current pointer into strip array ! 1287: ; ebx = cPels ; total number of pels in line ! 1288: ; ecx = dN ; delta-N = rise in line ! 1289: ; edx = d_R ; d_I + d_R/dN = exact strip length ! 1290: ; esi = i ; length of current strip ! 1291: ; edi = r ; remainder term for current strip ! 1292: ; ; in range [-dN..0) ! 1293: ! 1294: public dda_loop ! 1295: dda_loop: ! 1296: sub ebx, esi ; subtract strip length from line length ! 1297: jle final_strip ; if negative, done with line ! 1298: ! 1299: mov [eax], esi ; write strip length to strip array ! 1300: add eax, 4 ! 1301: cmp plStripEnd, eax ; is the strip array buffer full? ! 1302: jbe short output_strips ; if so, empty it ! 1303: ! 1304: ; The output_strips routine jumps to here when done: ! 1305: ! 1306: done_output_strips: ! 1307: mov esi, d_I ; our normal strip length ! 1308: add edi, edx ; adjust our remainder term ! 1309: jl short dda_loop ! 1310: ! 1311: sub edi, ecx ; our remainder became 1 or more, so ! 1312: inc esi ; we increment this strip length ! 1313: ; and adjust the remainder term ! 1314: ! 1315: ; We've unrolled our loop a bit, so this should look familiar to the above: ! 1316: ! 1317: sub ebx, esi ; subtract strip length from line length ! 1318: jle final_strip ; if negative, done with line ! 1319: ! 1320: mov [eax], esi ; write strip length to strip array ! 1321: add eax, 4 ; adjust strip pointer ! 1322: ! 1323: ; Note that banking requires us to check if the strip array is full here ! 1324: ; too (and note that if output_strips is called it will return to ! 1325: ; done_output_strips): ! 1326: ! 1327: cmp plStripEnd, eax ! 1328: jbe short output_strips ! 1329: ! 1330: mov esi, d_I ; our normal strip length ! 1331: add edi, edx ; adjust our remainder term ! 1332: jl short dda_loop ! 1333: ! 1334: sub edi, ecx ; our remainder became 1 or more, so ! 1335: inc esi ; adjust ! 1336: jmp short dda_loop ! 1337: ! 1338: zero_slope: ! 1339: mov esi, 7fffffffh ! 1340: jmp short done_dda_stuff ! 1341: ! 1342: ;-----------------------------------------------------------------------; ! 1343: ; Empty strips buffer & possibly do x-major bank switch. ; ! 1344: ;-----------------------------------------------------------------------; ! 1345: ! 1346: output_strips: ! 1347: mov d_R, edx ! 1348: mov cPels, ebx ! 1349: mov i, esi ! 1350: mov r, edi ! 1351: mov dN, ecx ! 1352: ! 1353: lea edx, [strip] ! 1354: mov ecx, pls ! 1355: ! 1356: ; Call our strip routine: ! 1357: ! 1358: ptrCall <dword ptr pfn>, \ ! 1359: <edx, ecx, eax> ! 1360: ! 1361: ; It may be that we ran out of run in our strips buffer, and don't ! 1362: ; actually have to switch banks. See if that's the case: ! 1363: ! 1364: mov eax, cStripsInNextRun ! 1365: or eax, eax ! 1366: jg short done_strip_bank_switch ! 1367: ! 1368: ; We have to switch banks. See if we're going up or down: ! 1369: ! 1370: mov esi, ppdev ! 1371: test fl, FL_FLIP_V ! 1372: jz short bank_x_down ! 1373: ! 1374: bank_x_up: ! 1375: mov edi, strip.ST_pjScreen ! 1376: sub edi, [esi].pdev_pvBitmapStart ! 1377: mov ebx, [esi].pdev_rcl1WindowClip.yTop ! 1378: dec ebx ; we want yTop - 1 to be mapped in ! 1379: ! 1380: ; Map in the next higher bank: ! 1381: ! 1382: ptrCall <dword ptr [esi].pdev_pfnBankControl>, \ ! 1383: <esi, ebx, JustifyBottom>; ebx, esi and edi are preserved ! 1384: ! 1385: lea eax, [ebx + 1] ! 1386: sub eax, [esi].pdev_rcl1WindowClip.yTop ! 1387: ; eax = # of scans can do in bank ! 1388: ! 1389: add edi, [esi].pdev_pvBitmapStart ! 1390: mov strip.ST_pjScreen, edi ! 1391: ! 1392: jmp short done_strip_bank_switch ! 1393: ! 1394: bank_x_down: ! 1395: mov edi, strip.ST_pjScreen ! 1396: sub edi, [esi].pdev_pvBitmapStart ! 1397: mov ebx, [esi].pdev_rcl1WindowClip.yBottom ! 1398: ! 1399: ; Map in the next lower bank: ! 1400: ! 1401: ptrCall <dword ptr [esi].pdev_pfnBankControl>, \ ! 1402: <esi, ebx, JustifyTop> ; ebx, esi and edi are preserved ! 1403: ! 1404: mov eax, [esi].pdev_rcl1WindowClip.yBottom ! 1405: sub eax, ebx ; eax = # scans can do in bank ! 1406: ! 1407: add edi, [esi].pdev_pvBitmapStart ! 1408: mov strip.ST_pjScreen,edi ! 1409: ! 1410: done_strip_bank_switch: ! 1411: ! 1412: ; eax = cStripsInNextRun ! 1413: ! 1414: lea edx, [strip.ST_alStrips + (STRIP_MAX * 4)] ! 1415: sub eax, STRIP_MAX ! 1416: mov cStripsInNextRun, eax ! 1417: jge short get_ready_for_more_strips ! 1418: lea edx, [edx + eax * 4] ! 1419: ! 1420: get_ready_for_more_strips: ! 1421: mov plStripEnd, edx ! 1422: ! 1423: mov esi, i ! 1424: mov edi, r ! 1425: mov ebx, cPels ! 1426: mov edx, d_R ! 1427: mov ecx, dN ! 1428: lea eax, [strip.ST_alStrips] ! 1429: jmp done_output_strips ! 1430: ! 1431: ;-----------------------------------------------------------------------; ! 1432: ; Empty strips buffer. Either get new line or do y-major bank switch. ; ! 1433: ;-----------------------------------------------------------------------; ! 1434: ! 1435: final_strip: ! 1436: add ebx, esi ! 1437: mov [eax], ebx ! 1438: add eax, 4 ! 1439: ! 1440: cmp cPelsAfterThisBank, 0 ! 1441: jg short bank_y_major ! 1442: ! 1443: very_final_strip: ! 1444: lea edx, [strip] ! 1445: mov ecx, pls ! 1446: ! 1447: ptrCall <dword ptr pfn>, \ ! 1448: <edx, ecx, eax> ! 1449: ! 1450: ; NOTE: next_line is jumped to from various places, and it cannot assume ! 1451: ; any registers are loaded. ! 1452: ! 1453: next_line: ! 1454: mov ebx, flStart ! 1455: testb ebx, FL_COMPLEX_CLIP ! 1456: jnz short see_if_done_complex_clipping ! 1457: ! 1458: mov edx, pptfxBuf ! 1459: cmp edx, pptfxBufEnd ! 1460: je short all_done ! 1461: ! 1462: mov esi, [edx].ptl_x ! 1463: mov ecx, [edx].ptl_y ! 1464: add edx, size POINTL ! 1465: mov pptfxBuf, edx ! 1466: mov eax, [edx].ptl_x ! 1467: mov edi, [edx].ptl_y ! 1468: jmp the_main_loop ! 1469: ! 1470: all_done: ! 1471: mov eax, 1 ! 1472: ! 1473: cRet bLines ! 1474: ! 1475: see_if_done_complex_clipping: ! 1476: mov ebx, fl ! 1477: dec cptfx ! 1478: jz short all_done ! 1479: ! 1480: and ebx, NOT FL_FLIP_HALF ; Make sure the next run doesn't have ! 1481: mov fl, ebx ; to do a half-flip if it doesn't ! 1482: ; want to ! 1483: jmp continue_complex_clipping ! 1484: ! 1485: ;-----------------------------------------------------------------------; ! 1486: ; Switch banks for a y-major line. ; ! 1487: ;-----------------------------------------------------------------------; ! 1488: ! 1489: public bank_y_major ! 1490: bank_y_major: ! 1491: mov d_R, edx ! 1492: mov i, esi ! 1493: mov r, edi ! 1494: mov dN, ecx ! 1495: sub ebx, esi ; Undo our offset ! 1496: ! 1497: bank_y_output_strips: ! 1498: lea edx, [strip] ! 1499: mov ecx, pls ! 1500: ! 1501: ptrCall <dword ptr pfn>, \ ! 1502: <edx, ecx, eax> ! 1503: ! 1504: mov esi, ppdev ! 1505: test fl, FL_FLIP_V ! 1506: jz short bank_y_down ! 1507: ! 1508: bank_y_up: ! 1509: mov edi, strip.ST_pjScreen ! 1510: sub edi, [esi].pdev_pvBitmapStart ! 1511: mov ecx, [esi].pdev_rcl1WindowClip.yTop ! 1512: push ecx ! 1513: dec ecx ; we want yTop - 1 to be mapped in ! 1514: ! 1515: ; Map in the next higher bank: ! 1516: ! 1517: ptrCall <dword ptr [esi].pdev_pfnBankControl>, \ ! 1518: <esi, ecx, JustifyBottom>; ebx, esi and edi are preserved ! 1519: ! 1520: pop ecx ! 1521: sub ecx, [esi].pdev_rcl1WindowClip.yTop ! 1522: ; ecx = # of scans can do in bank ! 1523: ! 1524: add edi, [esi].pdev_pvBitmapStart ! 1525: mov strip.ST_pjScreen, edi ! 1526: ! 1527: mov edx, cPelsAfterThisBank ; edx = cPelsAfterBank ! 1528: lea eax, [strip.ST_alStrips] ; eax = plStrip ! 1529: or ebx, ebx ; ebx = cPels ! 1530: jge bank_y_done_partial_strip ! 1531: jmp short bank_y_done_switch ! 1532: ! 1533: bank_y_down: ! 1534: mov edi, strip.ST_pjScreen ! 1535: sub edi, [esi].pdev_pvBitmapStart ! 1536: mov ecx, [esi].pdev_rcl1WindowClip.yBottom ! 1537: push ecx ! 1538: ! 1539: ; Map in the next lower bank: ! 1540: ! 1541: ptrCall <dword ptr [esi].pdev_pfnBankControl>, \ ! 1542: <esi, ecx, JustifyTop> ; ebx, esi and edi are preserved ! 1543: ! 1544: pop eax ! 1545: mov ecx, [esi].pdev_rcl1WindowClip.yBottom ! 1546: sub ecx, eax ; ecx = # scans can do in bank ! 1547: ! 1548: add edi, [esi].pdev_pvBitmapStart ! 1549: mov strip.ST_pjScreen,edi ! 1550: ! 1551: mov edx, cPelsAfterThisBank ; edx = cPelsAfterBank ! 1552: lea eax, [strip.ST_alStrips] ; eax = plStrip ! 1553: or ebx, ebx ; ebx = cPels ! 1554: jge short bank_y_done_partial_strip ! 1555: ! 1556: bank_y_done_switch: ! 1557: ! 1558: ; Handle a single strip stretching over multiple banks: ! 1559: ! 1560: test fl, FL_FLIP_HALF ! 1561: jz short bank_y_no_half_flip ! 1562: ! 1563: ; We now have to adjust for the fact that the strip drawers always leave ! 1564: ; the state ready for the next new strip (e.g., if we're doing vertical ! 1565: ; strips, it advances pjScreen one to the right after drawing each strip). ! 1566: ; But the problem is that since we crossed a bank, we have to continue the ! 1567: ; *old* strip, so we have to undo that advance: ! 1568: ! 1569: bank_y_half_flip: ! 1570: inc strip.ST_pjScreen ! 1571: jmp short bank_y_done_bit_adjust ! 1572: ! 1573: bank_y_no_half_flip: ! 1574: dec strip.ST_pjScreen ! 1575: ! 1576: bank_y_done_bit_adjust: ! 1577: mov esi, ebx ! 1578: neg esi ; esi = # pels left in strip ! 1579: ! 1580: ; eax = pointer to first strip entry ! 1581: ; ebx = negative esi ! 1582: ; ecx = # of pels we can put down in this window ! 1583: ; edx = # of pels remaining to do in line ! 1584: ; esi = # of pels left in strip ! 1585: ! 1586: ; We have three special cases to check here: ! 1587: ; ! 1588: ; 1) If the strip spans the entire next window ! 1589: ; 2) This is the last strip in the line ! 1590: ; 3) Neither of the above ! 1591: ! 1592: cmp edx,ecx ;if line shorter than bank, ! 1593: jle short bank_y_check_if_last_strip; know strip doesn't span bank ! 1594: ! 1595: cmp esi,ecx ;if line spans bank, don't have ! 1596: jl short bank_y_continue_strip ; to check if last strip ! 1597: ! 1598: ; If ((# of pels in line > window size) && (# of pels in strip > window size)) ! 1599: ; then the strip spans this bank: ! 1600: ! 1601: mov [eax], ecx ! 1602: add eax, 4 ! 1603: add ebx, ecx ! 1604: sub edx, ecx ! 1605: mov cPelsAfterThisBank, edx ! 1606: jmp bank_y_output_strips ! 1607: ! 1608: bank_y_check_if_last_strip: ! 1609: cmp esi, edx ;if strip is shorter than line, ! 1610: jl short bank_y_continue_strip ; we know this isn't the last ! 1611: ; strip ! 1612: ! 1613: ; Handle case where this is the last strip in the line and it overlaps a bank: ! 1614: ! 1615: mov [eax], edx ! 1616: add eax, 4 ! 1617: jmp very_final_strip ! 1618: ! 1619: bank_y_continue_strip: ! 1620: mov [eax], esi ! 1621: add eax, 4 ! 1622: ! 1623: bank_y_done_partial_strip: ! 1624: add ebx, edx ; cPels += cPelsAfterThisBank ! 1625: sub edx, ecx ; cPelsAfterThisBank -= cyWindow ! 1626: ! 1627: jle short bank_y_get_ready ! 1628: sub ebx, edx ! 1629: ! 1630: bank_y_get_ready: ! 1631: mov cPelsAfterThisBank, edx ! 1632: mov edi, r ! 1633: mov edx, d_R ! 1634: mov ecx, dN ! 1635: jmp done_output_strips ! 1636: ! 1637: ;---------------------------Private-Routine-----------------------------; ! 1638: ; do_some_styling ! 1639: ; ! 1640: ; Inputs: ! 1641: ; eax = ptlStart.ptl_y ! 1642: ; ebx = fl ! 1643: ; ecx = ptlStart.ptl_x ! 1644: ; Preserves: ! 1645: ; eax, ebx, ecx ! 1646: ; Output: ! 1647: ; Exits to done_styling. ! 1648: ; ! 1649: ;-----------------------------------------------------------------------; ! 1650: ! 1651: public do_some_styling ! 1652: do_some_styling: ! 1653: mov esi, pls ! 1654: mov ptlStart.ptl_x, ecx ! 1655: ! 1656: mov edi, [esi].LS_spNext ; spThis ! 1657: mov edx, edi ! 1658: add edx, cStylePels ; spNext ! 1659: ! 1660: do_non_alternate_style: ! 1661: ! 1662: ; For styles, we don't bother to keep the style position normalized. ! 1663: ; (we do ensure that it's positive, though). If a figure is over 2 ! 1664: ; billion pels long, we'll be a pel off in our style state (oops!). ! 1665: ! 1666: and edx, 7fffffffh ! 1667: mov [esi].LS_spNext, edx ! 1668: mov ptlStart.ptl_y, eax ! 1669: ! 1670: testb ebx, FL_FLIP_H ! 1671: jz short arbitrary_left_to_right ! 1672: ! 1673: sub edx, x0 ! 1674: add edx, xStart ! 1675: mov eax, edx ! 1676: xor edx, edx ! 1677: div [esi].LS_spTotal ! 1678: ! 1679: neg edx ! 1680: jge short continue_right_to_left ! 1681: add edx, [esi].LS_spTotal ! 1682: not eax ! 1683: ! 1684: continue_right_to_left: ! 1685: mov edi, dword ptr [esi].LS_bStartIsGap ! 1686: not edi ! 1687: mov ecx, [esi].LS_aspRtoL ! 1688: jmp short compute_arbitrary_stuff ! 1689: ! 1690: arbitrary_left_to_right: ! 1691: add edi, x0 ! 1692: sub edi, xStart ! 1693: mov eax, edi ! 1694: xor edx, edx ! 1695: div [esi].LS_spTotal ! 1696: mov edi, dword ptr [esi].LS_bStartIsGap ! 1697: mov ecx, [esi].LS_aspLtoR ! 1698: ! 1699: compute_arbitrary_stuff: ! 1700: ; eax = sp / spTotal ! 1701: ; ebx = fl ! 1702: ; ecx = pspStart ! 1703: ; edx = sp % spTotal ! 1704: ; esi = pls ! 1705: ; edi = bIsGap ! 1706: ! 1707: and eax, [esi].LS_cStyle ; if odd length style and second run ! 1708: and al, 1 ; through style array, flip the ! 1709: jz short odd_style_array_done ; meaning of the elements ! 1710: not edi ! 1711: ! 1712: odd_style_array_done: ! 1713: mov eax, [esi].LS_cStyle ! 1714: mov strip.ST_pspStart, ecx ! 1715: lea eax, [ecx + eax * 4 - 4] ! 1716: mov strip.ST_pspEnd, eax ! 1717: ! 1718: find_psp: ! 1719: sub edx, [ecx] ! 1720: jl short found_psp ! 1721: add ecx, 4 ! 1722: jmp short find_psp ! 1723: ! 1724: found_psp: ! 1725: mov strip.ST_psp, ecx ! 1726: neg edx ! 1727: mov strip.ST_spRemaining, edx ! 1728: ! 1729: sub ecx, strip.ST_pspStart ! 1730: test ecx, 4 ; size STYLEPOS ! 1731: jz short done_arbitrary ! 1732: not edi ! 1733: ! 1734: done_arbitrary: ! 1735: mov dword ptr strip.ST_bIsGap, edi ! 1736: mov eax, ptlStart.ptl_y ! 1737: mov ecx, ptlStart.ptl_x ! 1738: jmp done_styling ! 1739: ! 1740: ;---------------------------Private-Routine-----------------------------; ! 1741: ; do_some_clipping ! 1742: ; ! 1743: ; Inputs: ! 1744: ; eax = garbage ! 1745: ; ebx = fl ! 1746: ; ecx = x0 ! 1747: ; edx = garbage ! 1748: ; esi = x1 ! 1749: ; edi = garbage ! 1750: ; ! 1751: ; Decides whether to do simple or complex clipping. ! 1752: ; ! 1753: ;-----------------------------------------------------------------------; ! 1754: ! 1755: align 4 ! 1756: ! 1757: public do_some_clipping ! 1758: do_some_clipping: ! 1759: testb ebx, FL_COMPLEX_CLIP ! 1760: jnz initialize_complex_clipping ! 1761: ! 1762: ;-----------------------------------------------------------------------; ! 1763: ; simple_clipping ! 1764: ; ! 1765: ; Inputs: ! 1766: ; ebx = fl ! 1767: ; ecx = x0 ! 1768: ; esi = x1 ! 1769: ; Output: ! 1770: ; ebx = fl ! 1771: ; ecx = new x0 (stack variable updated too) ! 1772: ; esi = new x1 ! 1773: ; y0 stack variable updated ! 1774: ; Uses: ! 1775: ; All registers ! 1776: ; Exits: ! 1777: ; to done_clipping ! 1778: ; ! 1779: ; This routine handles clipping the line to the clip rectangle (it's ! 1780: ; faster to handle this case in the driver than to call the engine to ! 1781: ; clip for us). ! 1782: ; ! 1783: ; Fractional end-point lines complicate our lives a bit when doing ! 1784: ; clipping: ! 1785: ; ! 1786: ; 1) For styling, we must know the unclipped line's length in pels, so ! 1787: ; that we can correctly update the styling state when the line is ! 1788: ; clipped. For this reason, I do clipping after doing the hard work ! 1789: ; of figuring out which pixels are at the ends of the line (this is ! 1790: ; wasted work if the line is not styled and is completely clipped, ! 1791: ; but I think it's simpler this way). Another reason is that we'll ! 1792: ; have calculated eqGamma already, which we use for the intercept ! 1793: ; calculations. ! 1794: ; ! 1795: ; With the assumption that most lines will not be completely clipped ! 1796: ; away, this strategy isn't too painful. ! 1797: ; ! 1798: ; 2) x0, y0 are not necessarily zero, where (x0, y0) is the start pel of ! 1799: ; the line. ! 1800: ; ! 1801: ; 3) We know x0, y0 and x1, but not y1. We haven't needed to calculate ! 1802: ; y1 until now. We'll need the actual value, and not an upper bound ! 1803: ; like y1 = LFLOOR(dM) + 2 because we have to be careful when ! 1804: ; calculating x(y) that y0 <= y <= y1, otherwise we can cause an ! 1805: ; overflow on the divide (which, needless to say, is bad). ! 1806: ; ! 1807: ;-----------------------------------------------------------------------; ! 1808: ! 1809: public simple_clipping ! 1810: simple_clipping: ! 1811: mov edi, prclClip ; get pointer to normalized clip rect ! 1812: and ebx, FL_RECTLCLIP_MASK ; (it's lower-right exclusive) ! 1813: ! 1814: .errnz (FL_RECTLCLIP_SHIFT - 2); ((ebx AND FL_RECTLCLIP_MASK) shr ! 1815: .errnz (size RECTL) - 16 ; FL_RECTLCLIP_SHIFT) is our index ! 1816: lea edi, [edi + ebx*4] ; into the array of rectangles ! 1817: ! 1818: mov edx, [edi].xRight ; load the rect coordinates ! 1819: mov eax, [edi].xLeft ! 1820: mov ebx, [edi].yBottom ! 1821: mov edi, [edi].yTop ! 1822: ! 1823: ; Translate to our origin and so some quick completely clipped tests: ! 1824: ! 1825: sub edx, x ! 1826: cmp ecx, edx ! 1827: jge totally_clipped ; totally clipped if x0 >= xRight ! 1828: ! 1829: sub eax, x ! 1830: cmp esi, eax ! 1831: jl totally_clipped ; totally clipped if x1 < xLeft ! 1832: ! 1833: sub ebx, y ! 1834: cmp y0, ebx ! 1835: jge totally_clipped ; totally clipped if y0 >= yBottom ! 1836: ! 1837: sub edi, y ! 1838: ! 1839: ; Save some state: ! 1840: ! 1841: mov xClipRight, edx ! 1842: mov xClipLeft, eax ! 1843: ! 1844: cmp esi, edx ; if (x1 >= xRight) x1 = xRight - 1 ! 1845: jl short calculate_y1 ! 1846: lea esi, [edx - 1] ! 1847: ! 1848: calculate_y1: ! 1849: mov eax, esi ; y1 = (x1 * dN + eqGamma) / dM ! 1850: mul dN ! 1851: add eax, eqGamma_lo ! 1852: adc edx, eqGamma_hi ! 1853: div dM ! 1854: ! 1855: cmp edi, eax ; if (yTop > y1) clipped ! 1856: jg short totally_clipped ! 1857: ! 1858: cmp ebx, eax ; if (yBottom > y1) know x1 ! 1859: jg short x1_computed ! 1860: ! 1861: mov eax, ebx ; x1 = (yBottom * dM + eqBeta) / dN ! 1862: mul dM ! 1863: stc ! 1864: sbb eax, eqGamma_lo ! 1865: sbb edx, eqGamma_hi ! 1866: div dN ! 1867: mov esi, eax ! 1868: ! 1869: ; At this point, we've taken care of calculating the intercepts with the ! 1870: ; right and bottom edges. Now we work on the left and top edges: ! 1871: ! 1872: x1_computed: ! 1873: mov edx, y0 ! 1874: ! 1875: mov eax, xClipLeft ; don't have to compute y intercept ! 1876: cmp eax, ecx ; at left edge if line starts to ! 1877: jle short top_intercept ; right of left edge ! 1878: ! 1879: mov ecx, eax ; x0 = xLeft ! 1880: mul dN ; y0 = (xLeft * dN + eqGamma) / dM ! 1881: add eax, eqGamma_lo ! 1882: adc edx, eqGamma_hi ! 1883: div dM ! 1884: ! 1885: cmp ebx, eax ; if (yBottom <= y0) clipped ! 1886: jle short totally_clipped ! 1887: ! 1888: mov edx, eax ! 1889: mov y0, eax ! 1890: ! 1891: top_intercept: ! 1892: mov ebx, fl ; get ready to leave ! 1893: mov x0, ecx ! 1894: ! 1895: cmp edi, edx ; if (yTop <= y0) done clipping ! 1896: jle done_clipping ! 1897: ! 1898: mov eax, edi ; x0 = (yTop * dM + eqBeta) / dN + 1 ! 1899: mul dM ! 1900: stc ! 1901: sbb eax, eqGamma_lo ! 1902: sbb edx, eqGamma_hi ! 1903: div dN ! 1904: lea ecx, [eax + 1] ! 1905: ! 1906: cmp xClipRight, ecx ; if (xRight <= x0) clipped ! 1907: jle short totally_clipped ! 1908: ! 1909: mov y0, edi ; y0 = yTop ! 1910: mov x0, ecx ! 1911: jmp done_clipping ; all done! ! 1912: ! 1913: totally_clipped: ! 1914: ! 1915: ; The line is completely clipped. See if we have to update our style state: ! 1916: ! 1917: mov ebx, fl ! 1918: testb ebx, FL_STYLED ! 1919: jz next_line ! 1920: ! 1921: ; Adjust our style state: ! 1922: ! 1923: mov esi, pls ! 1924: mov eax, [esi].LS_spNext ! 1925: add eax, cStylePels ! 1926: mov [esi].LS_spNext, eax ! 1927: ! 1928: cmp eax, [esi].LS_spTotal2 ! 1929: jb next_line ! 1930: ! 1931: ; Have to normalize first: ! 1932: ! 1933: xor edx, edx ! 1934: div [esi].LS_spTotal2 ! 1935: mov [esi].LS_spNext, edx ! 1936: ! 1937: jmp next_line ! 1938: ! 1939: ;-----------------------------------------------------------------------; ! 1940: ! 1941: initialize_complex_clipping: ! 1942: mov eax, dN ; save a copy of original dN ! 1943: mov dN_Original, eax ! 1944: ! 1945: ;---------------------------Private-Routine-----------------------------; ! 1946: ; continue_complex_clipping ! 1947: ; ! 1948: ; Inputs: ! 1949: ; ebx = fl ! 1950: ; Output: ! 1951: ; ebx = fl ! 1952: ; ecx = x0 ! 1953: ; esi = x1 ! 1954: ; Uses: ! 1955: ; All registers. ! 1956: ; Exits: ! 1957: ; to done_clipping ! 1958: ; ! 1959: ; This routine handles the necessary initialization for the next ! 1960: ; run in the CLIPLINE structure. ! 1961: ; ! 1962: ; NOTE: This routine is jumped to from two places! ! 1963: ;-----------------------------------------------------------------------; ! 1964: ! 1965: public continue_complex_clipping ! 1966: continue_complex_clipping: ! 1967: mov edi, prun ! 1968: mov ecx, xStart ! 1969: testb ebx, FL_FLIP_H ! 1970: jz short complex_left_to_right ! 1971: ! 1972: complex_right_to_left: ! 1973: ! 1974: ; Figure out x0 and x1 for right-to-left lines: ! 1975: ! 1976: add ecx, cStylePels ! 1977: dec ecx ! 1978: mov esi, ecx ; esi = ecx = xStart + cStylePels - 1 ! 1979: sub ecx, [edi].RUN_iStop ; New x0 ! 1980: sub esi, [edi].RUN_iStart ; New x1 ! 1981: jmp short complex_reset_variables ! 1982: ! 1983: complex_left_to_right: ! 1984: ! 1985: ; Figure out x0 and x1 for left-to-right lines: ! 1986: ! 1987: mov esi, ecx ; esi = ecx = xStart ! 1988: add ecx, [edi].RUN_iStart ; New x0 ! 1989: add esi, [edi].RUN_iStop ; New x1 ! 1990: ! 1991: complex_reset_variables: ! 1992: mov x0, ecx ! 1993: ! 1994: ; The half flip mucks with some of our variables, and we have to reset ! 1995: ; them every pass. We would have to reset eqGamma too, but it never ! 1996: ; got saved to memory in its modified form. ! 1997: ! 1998: add edi, size RUN ! 1999: mov prun, edi ; Increment run pointer for next time ! 2000: ! 2001: mov edi, pls ! 2002: mov eax, [edi].LS_spComplex ! 2003: mov [edi].LS_spNext, eax ; pls->spNext = pls->spComplex ! 2004: ! 2005: mov eax, dN_Original ; dN = dN_Original ! 2006: mov dN, eax ! 2007: ! 2008: mul ecx ! 2009: add eax, eqGamma_lo ! 2010: adc edx, eqGamma_hi ; [edx:eax] = dN*x0 + eqGamma ! 2011: ! 2012: div dM ! 2013: mov y0, eax ! 2014: jmp done_clipping ! 2015: ! 2016: endProc bLines ! 2017: ! 2018: end
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.