|
|
1.1 ! root 1: ;---------------------------Module-Header------------------------------; ! 2: ; Module Name: lines.asm ! 3: ; ! 4: ; Draws a set of connected polylines. Initialization for the device ! 5: ; or bitmap has already been done in the stroke routine. Solid and ! 6: ; styled lines are handled for both the device and bitmaps. Banking ! 7: ; for the display is handled. ! 8: ; ! 9: ; The code is different depending on whether we are drawing solid ! 10: ; lines, styled lines with common styles, or lines with completely ! 11: ; arbitrary styles. ! 12: ; ! 13: ; There are sixteen raster operations (sets of logical operations) ! 14: ; performed on the data written out. When writing to the VGA there are ! 15: ; four of these operations which take two passes of VGA memory. In ! 16: ; each of these cases the first pass inverts the necessary bits in the ! 17: ; necessary planes. The second pass then performs the rest of the ! 18: ; raster operation. The other twelve raster operations can be done in ! 19: ; one pass of VGA memory. All raster operations are done in one pass of ! 20: ; memory for bitmaps. Depending on the raster operation and the color ! 21: ; of the pen, it is easily determined whether we set bits to zeros, set ! 22: ; bits to ones, invert bits or do nothing. Bitmaps are written to one ! 23: ; plane at a time. ! 24: ; ! 25: ; Lines are drawn from left to right. So if a line moves from right ! 26: ; to left, the endpoints are swapped and the line is drawn from left to ! 27: ; right. ! 28: ; ! 29: ; Copyright (c) 1992 Microsoft Corporation ! 30: ;-----------------------------------------------------------------------; ! 31: ! 32: .386 ! 33: ! 34: .model small,c ! 35: ! 36: assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT ! 37: assume fs:nothing,gs:nothing ! 38: ! 39: .xlist ! 40: include stdcall.inc ;calling convention cmacros ! 41: include i386\egavga.inc ! 42: include i386\strucs.inc ! 43: include i386\lines.inc ! 44: .list ! 45: ! 46: .data ! 47: ! 48: public gaflRoundTable ! 49: gaflRoundTable label dword ! 50: dd FL_H_ROUND_DOWN + FL_V_ROUND_DOWN ; no flips ! 51: dd FL_H_ROUND_DOWN + FL_V_ROUND_DOWN ; D flip ! 52: dd FL_H_ROUND_DOWN ; V flip ! 53: dd FL_V_ROUND_DOWN ; D & V flip ! 54: dd FL_V_ROUND_DOWN ; slope one ! 55: dd 0baadf00dh ! 56: dd FL_H_ROUND_DOWN ; slope one & V flip ! 57: dd 0baadf00dh ! 58: ! 59: .code ! 60: ! 61: _TEXT$03 SEGMENT DWORD USE32 PUBLIC 'CODE' ! 62: ASSUME CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING ! 63: ! 64: ;--------------------------------Macro----------------------------------; ! 65: ; testb ebx, <mask> ! 66: ; ! 67: ; Substitutes a byte compare if the mask is entirely in the lo-byte or ! 68: ; hi-byte (thus saving 3 bytes of code space). ! 69: ; ! 70: ;-----------------------------------------------------------------------; ! 71: ! 72: TESTB macro targ,mask,thirdarg ! 73: local mask2,delta ! 74: ! 75: ifnb <thirdarg> ! 76: .err TESTB mask must be enclosed in brackets! ! 77: endif ! 78: ! 79: delta = 0 ! 80: mask2 = mask ! 81: ! 82: if mask2 AND 0ffff0000h ! 83: test targ,mask ; If bit set in hi-word, ! 84: exitm ; test entire dword ! 85: endif ! 86: ! 87: if mask2 AND 0ff00h ! 88: if mask2 AND 0ffh ; If bit set in lo-byte and ! 89: test targ,mask ; hi-byte, test entire dword ! 90: exitm ! 91: endif ! 92: ! 93: mask2 = mask2 SHR 8 ! 94: delta = 1 ! 95: endif ! 96: ! 97: ifidni <targ>,<EBX> ! 98: if delta ! 99: test bh,mask2 ! 100: else ! 101: test bl,mask2 ! 102: endif ! 103: exitm ! 104: endif ! 105: ! 106: .err Too bad TESTB doesn't support targets other than ebx! ! 107: endm ! 108: ! 109: ;---------------------------Public-Routine------------------------------; ! 110: ; bLines(pdsurf, pptfxFirst, pptfxBuf, prun, cptfx, pls, ! 111: ; prclClip, apfn[], flStart) ! 112: ; ! 113: ; Do all the DDA calculations for lines. ! 114: ; ! 115: ; Doing Lines Right ! 116: ; ----------------- ! 117: ; ! 118: ; In NT, all lines are given to the device driver in fractional ! 119: ; coordinates, in a 28.4 fixed point format. The lower 4 bits are ! 120: ; fractional for sub-pixel positioning. ! 121: ; ! 122: ; Note that you CANNOT! just round the coordinates to integers ! 123: ; and pass the results to your favorite integer Bresenham routine!! ! 124: ; (Unless, of course, you have such a high resolution device that ! 125: ; nobody will notice -- not likely for a display device.) The ! 126: ; fractions give a more accurate rendering of the line -- this is ! 127: ; important for things like our Bezier curves, which would have 'kinks' ! 128: ; if the points in its polyline approximation were rounded to integers. ! 129: ; ! 130: ; Unfortunately, for fractional lines there is more setup work to do ! 131: ; a DDA than for integer lines. However, the main loop is exactly ! 132: ; the same (and can be done entirely with 32 bit math). ! 133: ; ! 134: ; If You've Got Hardware That Does Bresenham ! 135: ; ------------------------------------------ ! 136: ; ! 137: ; A lot of hardware limits DDA error terms to 'n' bits. With fractional ! 138: ; coordinates, 4 bits are given to the fractional part, letting ! 139: ; you draw in hardware only those lines that lie entirely in a 2^(n-4) ! 140: ; by 2^(n-4) pixel space. ! 141: ; ! 142: ; And you still have to correctly draw those lines with coordinates ! 143: ; outside that space! Remember that the screen is only a viewport ! 144: ; onto a 28.4 by 28.4 space -- if any part of the line is visible ! 145: ; you MUST render it precisely, regardless of where the end points lie. ! 146: ; So even if you do it in software, somewhere you'll have to have a ! 147: ; 32 bit DDA routine. ! 148: ; ! 149: ; Our Implementation ! 150: ; ------------------ ! 151: ; ! 152: ; We employ a run length slice algorithm: our DDA calculates the ! 153: ; number of pixels that are in each row (or 'strip') of pixels. ! 154: ; ! 155: ; We've separated the running of the DDA and the drawing of pixels: ! 156: ; we run the DDA for several iterations and store the results in ! 157: ; a 'strip' buffer (which are the lengths of consecutive pixel rows of ! 158: ; the line), then we crank up a 'strip drawer' that will draw all the ! 159: ; strips in the buffer. ! 160: ; ! 161: ; We also employ a 'half-flip' to reduce the number of strip ! 162: ; iterations we need to do in the DDA and strip drawing loops: when a ! 163: ; (normalized) line's slope is more than 1/2, we do a final flip ! 164: ; about the line y = (1/2)x. So now, instead of each strip being ! 165: ; consecutive horizontal or vertical pixel rows, each strip is composed ! 166: ; of those pixels aligned in 45 degree rows. So a line like (0, 0) to ! 167: ; (128, 128) would generate only one strip. ! 168: ; ! 169: ; We also always draw only left-to-right. ! 170: ; ! 171: ; Style lines may have arbitrary style patterns. We specially ! 172: ; optimize the default patterns (and call them 'masked' styles). ! 173: ; ! 174: ; The DDA Derivation ! 175: ; ------------------ ! 176: ; ! 177: ; Here is how I like to think of the DDA calculation. ! 178: ; ! 179: ; We employ Knuth's "diamond rule": rendering a one-pixel-wide line ! 180: ; can be thought of as dragging a one-pixel-wide by one-pixel-high ! 181: ; diamond along the true line. Pixel centers lie on the integer ! 182: ; coordinates, and so we light any pixel whose center gets covered ! 183: ; by the "drag" region (John D. Hobby, Journal of the Association ! 184: ; for Computing Machinery, Vol. 36, No. 2, April 1989, pp. 209-229). ! 185: ; ! 186: ; We must define which pixel gets lit when the true line falls ! 187: ; exactly half-way between two pixels. In this case, we follow ! 188: ; the rule: when two pels are equidistant, the upper or left pel ! 189: ; is illuminated, unless the slope is exactly one, in which case ! 190: ; the upper or right pel is illuminated. (So we make the edges ! 191: ; of the diamond exclusive, except for the top and left vertices, ! 192: ; which are inclusive, unless we have slope one.) ! 193: ; ! 194: ; This metric decides what pixels should be on any line BEFORE it is ! 195: ; flipped around for our calculation. Having a consistent metric ! 196: ; this way will let our lines blend nicely with our curves. The ! 197: ; metric also dictates that we will never have one pixel turned on ! 198: ; directly above another that's turned on. We will also never have ! 199: ; a gap; i.e., there will be exactly one pixel turned on for each ! 200: ; column between the start and end points. All that remains to be ! 201: ; done is to decide how many pixels should be turned on for each row. ! 202: ; ! 203: ; So lines we draw will consist of varying numbers of pixels on ! 204: ; successive rows, for example: ! 205: ; ! 206: ; ****** ! 207: ; ***** ! 208: ; ****** ! 209: ; ***** ! 210: ; ! 211: ; We'll call each set of pixels on a row a "strip". ! 212: ; ! 213: ; (Please remember that our coordinate space has the origin as the ! 214: ; upper left pixel on the screen; postive y is down and positive x ! 215: ; is right.) ! 216: ; ! 217: ; Device coordinates are specified as fixed point 28.4 numbers, ! 218: ; where the first 28 bits are the integer coordinate, and the last ! 219: ; 4 bits are the fraction. So coordinates may be thought of as ! 220: ; having the form (x, y) = (M/F, N/F) where F is the constant scaling ! 221: ; factor F = 2^4 = 16, and M and N are 32 bit integers. ! 222: ; ! 223: ; Consider the line from (M0/F, N0/F) to (M1/F, N1/F) which runs ! 224: ; left-to-right and whose slope is in the first octant, and let ! 225: ; dM = M1 - M0 and dN = N1 - N0. Then dM >= 0, dN >= 0 and dM >= dN. ! 226: ; ! 227: ; Since the slope of the line is less than 1, the edges of the ! 228: ; drag region are created by the top and bottom vertices of the ! 229: ; diamond. At any given pixel row y of the line, we light those ! 230: ; pixels whose centers are between the left and right edges. ! 231: ; ! 232: ; Let mL(n) denote the line representing the left edge of the drag ! 233: ; region. On pixel row j, the column of the first pixel to be ! 234: ; lit is ! 235: ; ! 236: ; iL(j) = ceiling( mL(j * F) / F) ! 237: ; ! 238: ; Since the line's slope is less than one: ! 239: ; ! 240: ; iL(j) = ceiling( mL([j + 1/2] F) / F ) ! 241: ; ! 242: ; Recall the formula for our line: ! 243: ; ! 244: ; n(m) = (dN / dM) (m - M0) + N0 ! 245: ; ! 246: ; m(n) = (dM / dN) (n - N0) + M0 ! 247: ; ! 248: ; Since the line's slope is less than one, the line representing ! 249: ; the left edge of the drag region is the original line offset ! 250: ; by 1/2 pixel in the y direction: ! 251: ; ! 252: ; mL(n) = (dM / dN) (n - F/2 - N0) + M0 ! 253: ; ! 254: ; From this we can figure out the column of the first pixel that ! 255: ; will be lit on row j, being careful of rounding (if the left ! 256: ; edge lands exactly on an integer point, the pixel at that ! 257: ; point is not lit because of our rounding convention): ! 258: ; ! 259: ; iL(j) = floor( mL(j F) / F ) + 1 ! 260: ; ! 261: ; = floor( ((dM / dN) (j F - F/2 - N0) + M0) / F ) + 1 ! 262: ; ! 263: ; = floor( F dM j - F/2 dM - N0 dM + dN M0) / F dN ) + 1 ! 264: ; ! 265: ; F dM j - [ dM (N0 + F/2) - dN M0 ] ! 266: ; = floor( ---------------------------------- ) + 1 ! 267: ; F dN ! 268: ; ! 269: ; dM j - [ dM (N0 + F/2) - dN M0 ] / F ! 270: ; = floor( ------------------------------------ ) + 1 (1) ! 271: ; dN ! 272: ; ! 273: ; = floor( (dM j + alpha) / dN ) + 1 ! 274: ; ! 275: ; where ! 276: ; ! 277: ; alpha = - [ dM (N0 + F/2) - dN M0 ] / F ! 278: ; ! 279: ; We use equation (1) to calculate the DDA: there are iL(j+1) - iL(j) ! 280: ; pixels in row j. Because we are always calculating iL(j) for ! 281: ; integer quantities of j, we note that the only fractional term ! 282: ; is constant, and so we can 'throw away' the fractional bits of ! 283: ; alpha: ! 284: ; ! 285: ; beta = floor( - [ dM (N0 + F/2) - dN M0 ] / F ) (2) ! 286: ; ! 287: ; so ! 288: ; ! 289: ; iL(j) = floor( (dM j + beta) / dN ) + 1 (3) ! 290: ; ! 291: ; for integers j. ! 292: ; ! 293: ; Note if iR(j) is the line's rightmost pixel on row j, that ! 294: ; iR(j) = iL(j + 1) - 1. ! 295: ; ! 296: ; Similarly, rewriting equation (1) as a function of column i, ! 297: ; we can determine, given column i, on which pixel row j is the line ! 298: ; lit: ! 299: ; ! 300: ; dN i + [ dM (N0 + F/2) - dN M0 ] / F ! 301: ; j(i) = ceiling( ------------------------------------ ) - 1 ! 302: ; dM ! 303: ; ! 304: ; Floors are easier to compute, so we can rewrite this: ! 305: ; ! 306: ; dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F ! 307: ; j(i) = floor( ----------------------------------------------- ) - 1 ! 308: ; dM ! 309: ; ! 310: ; dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F - dM ! 311: ; = floor( ---------------------------------------------------- ) ! 312: ; dM ! 313: ; ! 314: ; dN i + [ dM (N0 + F/2) - dN M0 - 1 ] / F ! 315: ; = floor( ---------------------------------------- ) ! 316: ; dM ! 317: ; ! 318: ; We can once again wave our hands and throw away the fractional bits ! 319: ; of the remainder term: ! 320: ; ! 321: ; j(i) = floor( (dN i + gamma) / dM ) (4) ! 322: ; ! 323: ; where ! 324: ; ! 325: ; gamma = floor( [ dM (N0 + F/2) - dN M0 - 1 ] / F ) (5) ! 326: ; ! 327: ; We now note that ! 328: ; ! 329: ; beta = -gamma - 1 = ~gamma (6) ! 330: ; ! 331: ; To draw the pixels of the line, we could evaluate (3) on every scan ! 332: ; line to determine where the strip starts. Of course, we don't want ! 333: ; to do that because that would involve a multiply and divide for every ! 334: ; scan. So we do everything incrementally. ! 335: ; ! 336: ; We would like to easily compute c , the number of pixels on scan j: ! 337: ; j ! 338: ; ! 339: ; c = iL(j + 1) - iL(j) ! 340: ; j ! 341: ; ! 342: ; = floor((dM (j + 1) + beta) / dN) - floor((dM j + beta) / dN) (7) ! 343: ; ! 344: ; This may be rewritten as ! 345: ; ! 346: ; c = floor(i + r / dN) - floor(i + r / dN) (8) ! 347: ; j j+1 j+1 j j ! 348: ; ! 349: ; where i , i are integers and r < dN, r < dN. ! 350: ; j j+1 j j+1 ! 351: ; ! 352: ; Rewriting (7) again: ! 353: ; ! 354: ; c = floor(i + r / dN + dM / dN) - floor(i + r / dN) ! 355: ; j j j j j ! 356: ; ! 357: ; ! 358: ; = floor((r + dM) / dN) - floor(r / dN) ! 359: ; j j ! 360: ; ! 361: ; This may be rewritten as ! 362: ; ! 363: ; c = dI + floor((r + dR) / dN) - floor(r / dN) ! 364: ; j j j ! 365: ; ! 366: ; where dI + dR / dN = dM / dN, dI is an integer and dR < dN. ! 367: ; ! 368: ; r is the remainder (or "error") term in the DDA loop: r / dN ! 369: ; j j ! 370: ; is the exact fraction of a pixel at which the strip ends. To go ! 371: ; on to the next scan and compute c we need to know r . ! 372: ; j+1 j+1 ! 373: ; ! 374: ; So in the main loop of the DDA: ! 375: ; ! 376: ; c = dI + floor((r + dR) / dN) and r = (r + dR) % dN ! 377: ; j j j+1 j ! 378: ; ! 379: ; and we know r < dN, r < dN, and dR < dN. ! 380: ; j j+1 ! 381: ; ! 382: ; We have derived the DDA only for lines in the first octant; to ! 383: ; handle other octants we do the common trick of flipping the line ! 384: ; to the first octant by first making the line left-to-right by ! 385: ; exchanging the end-points, then flipping about the lines y = 0 and ! 386: ; y = x, as necessary. We must record the transformation so we can ! 387: ; undo them later. ! 388: ; ! 389: ; We must also be careful of how the flips affect our rounding. If ! 390: ; to get the line to the first octant we flipped about x = 0, we now ! 391: ; have to be careful to round a y value of 1/2 up instead of down as ! 392: ; we would for a line originally in the first octant (recall that ! 393: ; "In the case where two pels are equidistant, the upper or left ! 394: ; pel is illuminated..."). ! 395: ; ! 396: ; To account for this rounding when running the DDA, we shift the line ! 397: ; (or not) in the y direction by the smallest amount possible. That ! 398: ; takes care of rounding for the DDA, but we still have to be careful ! 399: ; about the rounding when determining the first and last pixels to be ! 400: ; lit in the line. ! 401: ; ! 402: ; Determining The First And Last Pixels In The Line ! 403: ; ------------------------------------------------- ! 404: ; ! 405: ; Fractional coordinates also make it harder to determine which pixels ! 406: ; will be the first and last ones in the line. We've already taken ! 407: ; the fractional coordinates into account in calculating the DDA, but ! 408: ; the DDA cannot tell us which are the end pixels because it is quite ! 409: ; happy to calculate pixels on the line from minus infinity to positive ! 410: ; infinity. ! 411: ; ! 412: ; The diamond rule determines the start and end pixels. (Recall that ! 413: ; the sides are exclusive except for the left and top vertices.) ! 414: ; This convention can be thought of in another way: there are diamonds ! 415: ; around the pixels, and wherever the true line crosses a diamond, ! 416: ; that pel is illuminated. ! 417: ; ! 418: ; Consider a line where we've done the flips to the first octant, and the ! 419: ; floor of the start coordinates is the origin: ! 420: ; ! 421: ; +-----------------------> +x ! 422: ; | ! 423: ; | 0 1 ! 424: ; | 0123456789abcdef ! 425: ; | ! 426: ; | 0 00000000?1111111 ! 427: ; | 1 00000000 1111111 ! 428: ; | 2 0000000 111111 ! 429: ; | 3 000000 11111 ! 430: ; | 4 00000 ** 1111 ! 431: ; | 5 0000 ****1 ! 432: ; | 6 000 1*** ! 433: ; | 7 00 1 **** ! 434: ; | 8 ? *** ! 435: ; | 9 22 3 **** ! 436: ; | a 222 33 *** ! 437: ; | b 2222 333 **** ! 438: ; | c 22222 3333 ** ! 439: ; | d 222222 33333 ! 440: ; | e 2222222 333333 ! 441: ; | f 22222222 3333333 ! 442: ; | ! 443: ; | 2 3 ! 444: ; v ! 445: ; +y ! 446: ; ! 447: ; If the start of the line lands on the diamond around pixel 0 (shown by ! 448: ; the '0' region here), pixel 0 is the first pel in the line. The same ! 449: ; is true for the other pels. ! 450: ; ! 451: ; A little more work has to be done if the line starts in the ! 452: ; 'nether-land' between the diamonds (as illustrated by the '*' line): ! 453: ; the first pel lit is the first diamond crossed by the line (pixel 1 in ! 454: ; our example). This calculation is determined by the DDA or slope of ! 455: ; the line. ! 456: ; ! 457: ; If the line starts exactly half way between two adjacent pixels ! 458: ; (denoted here by the '?' spots), the first pixel is determined by our ! 459: ; round-down convention (and is dependent on the flips done to ! 460: ; normalize the line). ! 461: ; ! 462: ; Last Pel Exclusive ! 463: ; ------------------ ! 464: ; ! 465: ; To eliminate repeatedly lit pels between continuous connected lines, ! 466: ; we employ a last-pel exclusive convention: if the line ends exactly on ! 467: ; the diamond around a pel, that pel is not lit. (This eliminates the ! 468: ; checks we had in the old code to see if we were re-lighting pels.) ! 469: ; ! 470: ; The Half Flip ! 471: ; ------------- ! 472: ; ! 473: ; To make our run length algorithm more efficient, we employ a "half ! 474: ; flip". If after normalizing to the first octant, the slope is more ! 475: ; than 1/2, we subtract the y coordinate from the x coordinate. This ! 476: ; has the effect of reflecting the coordinates through the line of slope ! 477: ; 1/2. Note that the diagonal gets mapped into the x-axis after a half ! 478: ; flip. ! 479: ; ! 480: ; How Many Bits Do We Need, Anyway? ! 481: ; --------------------------------- ! 482: ; ! 483: ; Note that if the line is visible on your screen, you must light up ! 484: ; exactly the correct pixels, no matter where in the 28.4 x 28.4 device ! 485: ; space the end points of the line lie (meaning you must handle 32 bit ! 486: ; DDAs, you can certainly have optimized cases for lesser DDAs). ! 487: ; ! 488: ; We move the origin to (floor(M0 / F), floor(N0 / F)), so when we ! 489: ; calculate gamma from (5), we know that 0 <= M0, N0 < F. And we ! 490: ; are in the first octant, so dM >= dN. Then we know that gamma can ! 491: ; be in the range [(-1/2)dM, (3/2)dM]. The DDI guarantees us that ! 492: ; valid lines will have dM and dN values at most 31 bits (unsigned) ! 493: ; of significance. So gamma requires 33 bits of significance (we store ! 494: ; this as a 64 bit number for convenience). ! 495: ; ! 496: ; When running through the DDA loop, r + dR can have a value in the ! 497: ; j ! 498: ; range 0 <= r < 2 dN; thus the result must be a 32 bit unsigned value. ! 499: ; j ! 500: ; ! 501: ; Testing Lines ! 502: ; ------------- ! 503: ; ! 504: ; To be NT compliant, a display driver must exactly adhere to GIQ, ! 505: ; which means that for any given line, the driver must light exactly ! 506: ; the same pels as does GDI. This can be tested using the Guiman tool ! 507: ; provided elsewhere in the DDK, and 'ZTest', which draws random lines ! 508: ; on the screen and to a bitmap, and compares the results. ! 509: ; ! 510: ; If You've Got Line Hardware ! 511: ; --------------------------- ! 512: ; ! 513: ; If your hardware already adheres to GIQ, you're all set. Otherwise ! 514: ; you'll want to look at the S3 sample code and read the following: ! 515: ; ! 516: ; 1) You'll want to special case integer-only lines, since they require ! 517: ; less processing time and are more common (CAD programs will probably ! 518: ; only ever give integer lines). GDI does not provide a flag saying ! 519: ; that all lines in a path are integer lines; consequently, you will ! 520: ; have to explicitly check every line. ! 521: ; ! 522: ; 2) You are required to correctly draw any line in the 28.4 device ! 523: ; space that intersects the viewport. If you have less than 32 bits ! 524: ; of significance in the hardware for the Bresenham terms, extremely ! 525: ; long lines would overflow the hardware. For such (rare) cases, you ! 526: ; can fall back to strip-drawing code, of which there is a C version in ! 527: ; the S3's lines.cxx (or if your display is a frame buffer, fall back ! 528: ; to the engine). ! 529: ; ! 530: ; 3) If you can explicitly set the Bresenham terms in your hardware, you ! 531: ; can draw non-integer lines using the hardware. If your hardware has ! 532: ; 'n' bits of precision, you can draw GIQ lines that are up to 2^(n-5) ! 533: ; pels long (4 bits are required for the fractional part, and one bit is ! 534: ; used as a sign bit). Note that integer lines don't require the 4 ! 535: ; fractional bits, so if you special case them as in 1), you can do ! 536: ; integer lines that are up to 2^(n - 1) pels long. See the S3's ! 537: ; fastline.asm for an example. ! 538: ; ! 539: ;-----------------------------------------------------------------------; ! 540: ! 541: cProc bLines,36,< \ ! 542: uses esi edi ebx, \ ! 543: pdsurf: ptr, \ ! 544: pptfxFirst: ptr, \ ! 545: pptfxBuf: ptr, \ ! 546: prun: ptr, \ ! 547: cptfx: dword, \ ! 548: pls: ptr, \ ! 549: prclClip: ptr, \ ! 550: apfn: ptr, \ ! 551: flStart: dword > ! 552: ! 553: ; pdsurf: Surface data ! 554: ; pptfxFirst: Start point of first line ! 555: ; pptfxBuf: All subsequent points ! 556: ; prun: Array of runs if doing complex clipping ! 557: ; cptfx: Number of points in pptfxBuf (i.e., # lines) ! 558: ; pls: Line state ! 559: ; prclClip: Clip rectangle if doing simple clipping ! 560: ; apfn: Pointer to table of strip drawers ! 561: ; flStart: Flags for all lines ! 562: ! 563: local cPelsAfterThisBank: dword ; For bank switching ! 564: local cStripsInNextRun: dword ; For bank switching ! 565: local pptfxBufEnd: ptr ; Last point in pptfxBuf ! 566: local M0: dword ; Normalized x0 in device coords ! 567: local dM: dword ; Delta-x in device coords ! 568: local N0: dword ; Normalized y0 in device coords ! 569: local dN: dword ; Delta-y in device coords ! 570: local fl: dword ; Flags for current line ! 571: local x: dword ; Normalized start pixel x-coord ! 572: local y: dword ; Normalized start pixel y-coord ! 573: local eqGamma_lo: dword ; Upper 32 bits of Gamma ! 574: local eqGamma_hi: dword ; Lower 32 bits of Gamma ! 575: local x0: dword ; Start pixel x-offset ! 576: local y0: dword ; Start pixel y-offset ! 577: local ulSlopeOneAdjustment: dword ; Special offset if line of slope 1 ! 578: local cStylePels: dword ; # of pixels in line (before clip) ! 579: local xStart: dword ; Start pixel x-offset before clip ! 580: local pfn: ptr ; Pointer to strip drawing function ! 581: local cPels: dword ; # pixels to be drawn (after clip) ! 582: local i: dword ; # pixels in strip ! 583: local r: dword ; Remainder (or "error") term ! 584: local d_I: dword ; Delta-I ! 585: local d_R: dword ; Delta-R ! 586: local plStripEnd: ptr ; Last strip in buffer ! 587: local ptlStart[size POINTL]: byte ; Unnormalized start coord ! 588: local dN_Original: dword ; dN before half-flip ! 589: local xClipLeft: dword ; Left side of clip rectangle ! 590: local xClipRight: dword ; Right side of clip rectangle ! 591: local strip[size STRIPS]: byte ; Our strip buffer ! 592: ! 593: ; Do some initializing: ! 594: ! 595: mov ecx, cptfx ! 596: mov edx, pptfxBuf ! 597: lea eax, [edx + ecx * (size POINTL) - (size POINTL)] ! 598: mov pptfxBufEnd, eax ; pptfxBufEnd is inclusive of end point ! 599: ! 600: mov eax, [edx].ptl_x ; Load up end point (M1, N1) ! 601: mov edi, [edx].ptl_y ! 602: ! 603: mov edx, pptfxFirst ; Load up start point (M0, N0) ! 604: mov esi, [edx].ptl_x ! 605: mov ecx, [edx].ptl_y ! 606: ! 607: mov ebx, flStart ! 608: ! 609: ;-----------------------------------------------------------------------; ! 610: ; Flip to the first octant. ; ! 611: ;-----------------------------------------------------------------------; ! 612: ! 613: ; Register state: esi = M0 ! 614: ; ecx = N0 ! 615: ; eax = dM (M1) ! 616: ; edi = dN (N1) ! 617: ; ebx = fl ! 618: ! 619: ; Make sure we go left to right: ! 620: ! 621: the_main_loop: ! 622: cmp esi, eax ! 623: jle short is_left_to_right ; skip if M0 <= M1 ! 624: xchg esi, eax ; swap M0, M1 ! 625: xchg ecx, edi ; swap N0, N1 ! 626: or ebx, FL_FLIP_H ! 627: ! 628: is_left_to_right: ! 629: ! 630: ; Compute the deltas, remembering that the DDI says we should get ! 631: ; deltas less than 2^31. If we get more, we ensure we don't crash ! 632: ; later on by simply skipping the line: ! 633: ! 634: sub eax, esi ; eax = dM ! 635: jo next_line ; dM must be less than 2^31 ! 636: sub edi, ecx ; edi = dN ! 637: jo next_line ; dN must be less than 2^31 ! 638: ! 639: jge short is_top_to_bottom ; skip if dN >= 0 ! 640: neg ecx ; N0 = -N0 ! 641: neg edi ; N1 = -N1 ! 642: or ebx, FL_FLIP_V ! 643: ! 644: is_top_to_bottom: ! 645: cmp edi, eax ! 646: jb short done_flips ; skip if dN < dM ! 647: jne short slope_more_than_one ! 648: ! 649: ; We must special case slopes of one (because of our rounding convention): ! 650: ! 651: or ebx, FL_FLIP_SLOPE_ONE ! 652: jmp short done_flips ! 653: ! 654: slope_more_than_one: ! 655: xchg eax, edi ; swap dM, dN ! 656: xchg esi, ecx ; swap M0, N0 ! 657: or ebx, FL_FLIP_D ! 658: ! 659: done_flips: ! 660: ! 661: mov edx, ebx ! 662: and edx, FL_ROUND_MASK ! 663: .errnz FL_ROUND_SHIFT - 2 ! 664: or ebx, [gaflRoundTable + edx] ; get our rounding flags ! 665: ! 666: mov dM, eax ; save some info ! 667: mov dN, edi ! 668: mov fl, ebx ! 669: ! 670: ; We're going to shift our origin so that it's at the closest integer ! 671: ; coordinate to the left/above our fractional start point (it makes ! 672: ; the math quicker): ! 673: ! 674: mov edx, esi ; x = LFLOOR(M0) ! 675: sar edx, FLOG2 ! 676: mov x, edx ! 677: ! 678: mov edx, ecx ; y = LFLOOR(N0) ! 679: sar edx, FLOG2 ! 680: mov y, edx ! 681: ! 682: ;-----------------------------------------------------------------------; ! 683: ; Compute the fractional remainder term ; ! 684: ;-----------------------------------------------------------------------; ! 685: ! 686: ; By shifting the origin we've contrived to eliminate the integer ! 687: ; portion of our fractional start point, giving us start point ! 688: ; fractional coordinates in the range [0, F - 1]: ! 689: ! 690: and esi, F - 1 ; M0 = FXFRAC(M0) ! 691: and ecx, F - 1 ; N0 = FXFRAC(N0) ! 692: ! 693: ; We now compute Gamma: ! 694: ! 695: mov M0, esi ; save M0, N0 for later ! 696: mov N0, ecx ! 697: ! 698: lea edx, [ecx + F/2] ! 699: mul edx ; [edx:eax] = dM * (N0 + F/2) ! 700: xchg eax, edi ! 701: mov ecx, edx ; [ecx:edi] = dM * (N0 + F/2) ! 702: ; (we just nuked N0) ! 703: ! 704: mul esi ; [edx:eax] = dN * M0 ! 705: ! 706: ; Now gamma = dM * (N0 + F/2) - dN * M0 - bRoundDown ! 707: ! 708: .errnz FL_V_ROUND_DOWN - 8000h ! 709: ror bh, 8 ! 710: sbb edi, eax ! 711: sbb ecx, edx ! 712: ! 713: shrd edi, ecx, FLOG2 ! 714: sar ecx, FLOG2 ; gamma = [ecx:edi] >>= 4 ! 715: ! 716: mov eqGamma_hi, ecx ! 717: mov eqGamma_lo, edi ! 718: ! 719: mov eax, N0 ! 720: ! 721: ; Register state: ! 722: ; eax = N0 ! 723: ; ebx = fl ! 724: ; ecx = eqGamma_hi ! 725: ; edx = garbage ! 726: ; esi = M0 ! 727: ; edi = eqGamma_lo ! 728: ! 729: testb ebx, FL_FLIP_H ! 730: jnz line_runs_right_to_left ! 731: ! 732: ;-----------------------------------------------------------------------; ! 733: ; Figure out which pixels are at the ends of a left-to-right line. ; ! 734: ; --------> ; ! 735: ;-----------------------------------------------------------------------; ! 736: ! 737: public line_runs_left_to_right ! 738: line_runs_left_to_right: ! 739: or esi, esi ! 740: jz short LtoR_check_slope_one ! 741: ; skip ahead if M0 == 0 ! 742: ; (in that case, x0 = 0 which is to be ! 743: ; kept in esi, and is already ! 744: ; conventiently zero) ! 745: ! 746: or eax, eax ! 747: jnz short LtoR_N0_not_zero ! 748: ! 749: .errnz FL_H_ROUND_DOWN - 80h ! 750: ror bl, 8 ! 751: sbb esi, -F/2 ! 752: shr esi, FLOG2 ! 753: jmp short LtoR_check_slope_one ! 754: ; esi = x0 = rounded M0 ! 755: ! 756: LtoR_N0_not_zero: ! 757: sub eax, F/2 ! 758: sbb edx, edx ! 759: xor eax, edx ! 760: sub eax, edx ! 761: cmp esi, eax ! 762: sbb esi, esi ! 763: inc esi ; esi = x0 = (abs(N0 - F/2) <= M0) ! 764: ! 765: public LtoR_check_slope_one ! 766: LtoR_check_slope_one: ! 767: mov ulSlopeOneAdjustment, 0 ! 768: mov eax, ebx ! 769: and eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN ! 770: cmp eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN ! 771: jne short LtoR_compute_y0_from_x0 ! 772: ! 773: ; We have to special case lines that are exactly of slope 1 or -1: ! 774: ! 775: mov eax, N0 ! 776: add eax, dN ! 777: and eax, F - 1 ; eax = N1 ! 778: jz short LtoR_slope_one_check_start_point ! 779: ! 780: mov edx, M0 ! 781: add edx, dM ! 782: and edx, F - 1 ; edx = M1 ! 783: ! 784: add eax, F/2 ! 785: cmp edx, eax ; cmp M1, N1 + F/2 ! 786: jne short LtoR_slope_one_check_start_point ! 787: mov ulSlopeOneAdjustment, -1 ! 788: ! 789: LtoR_slope_one_check_start_point: ! 790: mov eax, M0 ! 791: or eax, eax ! 792: jz short LtoR_compute_y0_from_x0 ! 793: ! 794: add eax, F/2 ! 795: cmp eax, N0 ; cmp M0 + 8, N0 ! 796: jne short LtoR_compute_y0_from_x0 ! 797: ! 798: xor esi, esi ; x0 = 0 ! 799: ! 800: LtoR_compute_y0_from_x0: ! 801: ! 802: ; ecx = eqGamma_hi ! 803: ; esi = x0 ! 804: ; edi = eqGamma_lo ! 805: ! 806: mov eax, dN ! 807: mov edx, dM ! 808: ! 809: mov x0, esi ! 810: mov y0, 0 ! 811: cmp ecx, 0 ! 812: jl short LtoR_compute_x1 ! 813: ! 814: neg esi ! 815: and esi, eax ! 816: sub edx, esi ! 817: cmp edi, edx ! 818: mov edx, dM ! 819: jl short LtoR_compute_x1 ! 820: mov y0, 1 ; y0 = floor((dN * x0 + eqGamma) / dM) ! 821: ! 822: LtoR_compute_x1: ! 823: ! 824: ; Register state: ! 825: ; eax = dN ! 826: ; ebx = fl ! 827: ; ecx = garbage ! 828: ; edx = dM ! 829: ; esi = garbage ! 830: ; edi = garbage ! 831: ! 832: mov esi, M0 ! 833: add esi, edx ! 834: mov ecx, esi ! 835: shr esi, FLOG2 ! 836: dec esi ; x1 = ((M0 + dM) >> 4) - 1 ! 837: add esi, ulSlopeOneAdjustment ! 838: and ecx, F-1 ; M1 = (M0 + dM) & 15 ! 839: jz done_first_pel_last_pel ! 840: ! 841: add eax, N0 ! 842: and eax, F-1 ; N1 = (N0 + dN) & 15 ! 843: jnz short LtoR_N1_not_zero ! 844: ! 845: .errnz FL_H_ROUND_DOWN - 80h ! 846: ror bl, 8 ! 847: sbb ecx, -F/2 ! 848: shr ecx, FLOG2 ; ecx = LROUND(M1, fl & FL_ROUND_DOWN) ! 849: add esi, ecx ! 850: jmp done_first_pel_last_pel ! 851: ! 852: LtoR_N1_not_zero: ! 853: sub eax, F/2 ! 854: sbb edx, edx ! 855: xor eax, edx ! 856: sub eax, edx ! 857: cmp eax, ecx ! 858: jg done_first_pel_last_pel ! 859: inc esi ! 860: jmp done_first_pel_last_pel ! 861: ! 862: ;-----------------------------------------------------------------------; ! 863: ; Figure out which pixels are at the ends of a right-to-left line. ; ! 864: ; <-------- ; ! 865: ;-----------------------------------------------------------------------; ! 866: ! 867: ; Compute x0: ! 868: ! 869: public line_runs_right_to_left ! 870: line_runs_right_to_left: ! 871: mov x0, 1 ; x0 = 1 ! 872: or eax, eax ! 873: jnz short RtoL_N0_not_zero ! 874: ! 875: xor edx, edx ; ulDelta = 0 ! 876: .errnz FL_H_ROUND_DOWN - 80h ! 877: ror bl, 8 ! 878: sbb esi, -F/2 ! 879: shr esi, FLOG2 ; esi = LROUND(M0, fl & FL_H_ROUND_DOWN) ! 880: jz short RtoL_check_slope_one ! 881: ! 882: mov x0, 2 ! 883: mov edx, dN ! 884: jmp short RtoL_check_slope_one ! 885: ! 886: RtoL_N0_not_zero: ! 887: sub eax, F/2 ! 888: sbb edx, edx ! 889: xor eax, edx ! 890: sub eax, edx ! 891: add eax, esi ; eax = ABS(N0 - F/2) + M0 ! 892: xor edx, edx ; ulDelta = 0 ! 893: cmp eax, F ! 894: jle short RtoL_check_slope_one ! 895: ! 896: mov x0, 2 ; x0 = 2 ! 897: mov edx, dN ; ulDelta = dN ! 898: ! 899: public RtoL_check_slope_one ! 900: RtoL_check_slope_one: ! 901: mov ulSlopeOneAdjustment, 0 ! 902: mov eax, ebx ! 903: and eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN ! 904: cmp eax, FL_FLIP_SLOPE_ONE ! 905: jne short RtoL_compute_y0_from_x0 ! 906: ! 907: ; We have to special case lines that are exactly of slope 1 or -1: ! 908: ! 909: mov eax, N0 ! 910: add eax, dN ! 911: and eax, F - 1 ; eax = N1 ! 912: jz short RtoL_slope_one_check_start_point ! 913: ! 914: mov esi, M0 ! 915: add esi, dM ! 916: and esi, F - 1 ; esi = M1 ! 917: ! 918: add eax, F/2 ! 919: cmp esi, eax ; cmp M1, N1 + F/2 ! 920: jne short RtoL_slope_one_check_start_point ! 921: mov ulSlopeOneAdjustment, 1 ! 922: ! 923: RtoL_slope_one_check_start_point: ! 924: mov eax, M0 ! 925: or eax, eax ! 926: jz short RtoL_compute_y0_from_x0 ! 927: ! 928: add eax, F/2 ! 929: cmp eax, N0 ; cmp M0 + 8, N0 ! 930: jne short RtoL_compute_y0_from_x0 ! 931: ! 932: mov x0, 2 ; x0 = 2 ! 933: mov edx, dN ; ulDelta = dN ! 934: ! 935: RtoL_compute_y0_from_x0: ! 936: ! 937: ; eax = garbage ! 938: ; ebx = fl ! 939: ; ecx = eqGamma_hi ! 940: ; edx = ulDelta ! 941: ; esi = garbage ! 942: ; edi = eqGamma_lo ! 943: ! 944: mov eax, dN ; eax = dN ! 945: mov y0, 0 ; y0 = 0 ! 946: ! 947: add edi, edx ! 948: adc ecx, 0 ; eqGamma += ulDelta ! 949: ; NOTE: Setting flags here! ! 950: mov edx, dM ; edx = dM ! 951: jl short RtoL_compute_x1 ; NOTE: Looking at the flags here! ! 952: jg short RtoL_y0_is_2 ! 953: ! 954: lea ecx, [edx + edx] ! 955: sub ecx, eax ; ecx = 2 * dM - dN ! 956: cmp edi, ecx ! 957: jge short RtoL_y0_is_2 ! 958: ! 959: sub ecx, edx ; ecx = dM - dN ! 960: cmp edi, ecx ! 961: jl short RtoL_compute_x1 ! 962: ! 963: mov y0, 1 ! 964: jmp short RtoL_compute_x1 ! 965: ! 966: RtoL_y0_is_2: ! 967: mov y0, 2 ! 968: ! 969: RtoL_compute_x1: ! 970: ! 971: ; Register state: ! 972: ; eax = dN ! 973: ; ebx = fl ! 974: ; ecx = garbage ! 975: ; edx = dM ! 976: ; esi = garbage ! 977: ; edi = garbage ! 978: ! 979: mov esi, M0 ! 980: add esi, edx ! 981: mov ecx, esi ! 982: shr esi, FLOG2 ; x1 = (M0 + dM) >> 4 ! 983: add esi, ulSlopeOneAdjustment ! 984: and ecx, F-1 ; M1 = (M0 + dM) & 15 ! 985: ! 986: add eax, N0 ! 987: and eax, F-1 ; N1 = (N0 + dN) & 15 ! 988: jnz short RtoL_N1_not_zero ! 989: ! 990: .errnz FL_H_ROUND_DOWN - 80h ! 991: ror bl, 8 ! 992: sbb ecx, -F/2 ! 993: shr ecx, FLOG2 ; ecx = LROUND(M1, fl & FL_ROUND_DOWN) ! 994: add esi, ecx ! 995: jmp done_first_pel_last_pel ! 996: ! 997: RtoL_N1_not_zero: ! 998: sub eax, F/2 ! 999: sbb edx, edx ! 1000: xor eax, edx ! 1001: sub eax, edx ! 1002: add eax, ecx ; eax = ABS(N1 - F/2) + M1 ! 1003: cmp eax, F+1 ! 1004: sbb esi, -1 ! 1005: ! 1006: done_first_pel_last_pel: ! 1007: ! 1008: ; Register state: ! 1009: ; eax = garbage ! 1010: ; ebx = fl ! 1011: ; ecx = garbage ! 1012: ; edx = garbage ! 1013: ; esi = x1 ! 1014: ; edi = garbage ! 1015: ! 1016: mov ecx, x0 ! 1017: lea edx, [esi + 1] ! 1018: sub edx, ecx ; edx = x1 - x0 + 1 ! 1019: ! 1020: jle next_line ! 1021: mov cStylePels, edx ! 1022: mov xStart, ecx ! 1023: ! 1024: ;-----------------------------------------------------------------------; ! 1025: ; See if clipping or styling needs to be done. ; ! 1026: ;-----------------------------------------------------------------------; ! 1027: ! 1028: testb ebx, FL_CLIP ! 1029: jnz do_some_clipping ! 1030: ! 1031: ; Register state: ! 1032: ; eax = garbage ! 1033: ; ebx = fl ! 1034: ; ecx = x0 (stack variable correct too) ! 1035: ; edx = garbage ! 1036: ; esi = x1 ! 1037: ; edi = garbage ! 1038: ! 1039: done_clipping: ! 1040: mov eax, y0 ! 1041: ! 1042: sub esi, ecx ! 1043: inc esi ; esi = cPels = x1 - x0 + 1 ! 1044: mov cPels, esi ! 1045: ! 1046: mov esi, pdsurf ! 1047: add ecx, x ; ecx = ptlStart.ptl_x ! 1048: add eax, y ; eax = ptlStart.ptl_y ! 1049: ! 1050: mov esi, [esi].dsurf_lNextScan ; we'll compute the sign of lNextScan ! 1051: ! 1052: testb ebx, FL_FLIP_D ! 1053: jz short do_v_unflip ! 1054: xchg ecx, eax ! 1055: ! 1056: do_v_unflip: ! 1057: testb ebx, FL_FLIP_V ! 1058: jz short done_unflips ! 1059: neg eax ! 1060: neg esi ! 1061: ! 1062: done_unflips: ! 1063: mov strip.ST_lNextScan, esi ; lNextScan now right for y-direction ! 1064: testb ebx, FL_STYLED ! 1065: jnz do_some_styling ! 1066: ! 1067: done_styling: ! 1068: lea edx, [strip.ST_alStrips + (STRIP_MAX * 4)] ! 1069: mov plStripEnd, edx ! 1070: ! 1071: mov cPelsAfterThisBank, 0 ! 1072: mov cStripsInNextRun, 7fffffffh ! 1073: ! 1074: testb ebx, FL_PHYSICAL_DEVICE ! 1075: jz done_bank_setup ! 1076: ! 1077: ;-----------------------------------------------------------------------; ! 1078: ; Do banking setup. ; ! 1079: ;-----------------------------------------------------------------------; ! 1080: ! 1081: public bank_setup ! 1082: bank_setup: ! 1083: ! 1084: ; Register state: ! 1085: ; eax = ptlStart.ptl_y ! 1086: ; ebx = fl ! 1087: ; ecx = ptlStart.ptl_x ! 1088: ; edx = garbage ! 1089: ; esi = garbage ! 1090: ; edi = garbage ! 1091: ! 1092: mov esi, pdsurf ! 1093: cmp eax, [esi].dsurf_rcl1WindowClip.yTop ! 1094: jl short bank_get_initial_bank ; ptlStart.y < rcl1WindowClip.yTop ! 1095: ! 1096: cmp eax, [esi].dsurf_rcl1WindowClip.yBottom ! 1097: jl short bank_got_initial_bank ; ptlStart.y < rcl1WindowClip.yBot ! 1098: ! 1099: bank_get_initial_bank: ! 1100: mov ptlStart.ptl_y, eax ; Save ptlStart.ptl_y ! 1101: mov edi, ecx ; Save ptlStart.ptl_x ! 1102: ! 1103: .errnz JustifyTop ! 1104: .errnz JustifyBottom - 1 ! 1105: .errnz FL_FLIP_V - 8 ! 1106: ! 1107: mov ecx, ebx ; JustifyTop if line goes down, ! 1108: shr ecx, 3 ; JustifyBottom if line goes up ! 1109: and ecx, 1 ! 1110: ! 1111: bank_justified: ! 1112: ptrCall <dword ptr [esi].dsurf_pfnBankControl>, \ ! 1113: <esi, eax, ecx> ! 1114: ! 1115: mov eax, ptlStart.ptl_y ! 1116: mov ecx, edi ! 1117: ! 1118: bank_got_initial_bank: ! 1119: testb ebx, FL_FLIP_D ! 1120: jz short bank_major_x ! 1121: ! 1122: bank_major_y: ! 1123: testb ebx, FL_FLIP_V ! 1124: jz short bank_major_y_down ! 1125: bank_major_y_up: ! 1126: lea edi, [eax + 1] ! 1127: sub edi, [esi].dsurf_rcl1WindowClip.yTop ! 1128: jmp short bank_done_y_major ! 1129: bank_major_y_down: ! 1130: mov edi, [esi].dsurf_rcl1WindowClip.yBottom ! 1131: sub edi, eax ! 1132: bank_done_y_major: ! 1133: mov esi, cPels ! 1134: sub esi, edi ; edi = cPelsInBank ! 1135: mov cPelsAfterThisBank, esi ! 1136: jle short done_bank_setup ! 1137: mov cPels, edi ! 1138: jmp short done_bank_setup ! 1139: ! 1140: bank_major_x: ! 1141: mov edi, dN ! 1142: shr edi, FLOG2 ! 1143: add edi, y ! 1144: ! 1145: ; We're guessing at the y-position of the end pixel (it's too much work ! 1146: ; to compute the actual value) to see if the line spans more than one ! 1147: ; bank. We have to add at least a slop value of '3' because the actual ! 1148: ; start pixel may be may 2 off from 'y' because of end-pixel exclusiveness, ! 1149: ; and we have to add 1 more because we're taking the floor of (dN / F), to ! 1150: ; account for rounding: ! 1151: ! 1152: add edi, 3 ; yEnd = edi = y + LFLOOR(dN) + 3 ! 1153: testb ebx, FL_FLIP_V ! 1154: jz short bank_major_x_down ! 1155: bank_major_x_up: ! 1156: mov edx, 1 ! 1157: sub edx, [esi].dsurf_rcl1WindowClip.yTop ; edx = -yNextBankStart ! 1158: ! 1159: cmp edi, edx ! 1160: lea edx, [edx + eax] ; edx = cStripsInNextRun ! 1161: jl short bank_major_x_done ! 1162: ! 1163: ; Line may go over bank boundary, so don't do a half flip: ! 1164: ! 1165: or ebx, FL_DONT_DO_HALF_FLIP ! 1166: jmp short bank_major_x_done ! 1167: ! 1168: bank_major_x_down: ! 1169: mov esi, [esi].dsurf_rcl1WindowClip.yBottom ; esi = yNextBankStart ! 1170: ! 1171: mov edx, esi ! 1172: sub edx, eax ; edx = cStripsInNextRun ! 1173: ! 1174: cmp edi, esi ! 1175: jl short bank_major_x_done ! 1176: or ebx, FL_DONT_DO_HALF_FLIP ! 1177: ! 1178: bank_major_x_done: ! 1179: sub edx, STRIP_MAX ! 1180: mov cStripsInNextRun, edx ! 1181: jge short done_bank_setup ! 1182: ! 1183: lea edx, [strip.ST_alStrips + edx * 4 + (STRIP_MAX * 4)] ! 1184: mov plStripEnd, edx ! 1185: ! 1186: done_bank_setup: ! 1187: ! 1188: ;-----------------------------------------------------------------------; ! 1189: ; Setup to do DDA. ; ! 1190: ;-----------------------------------------------------------------------; ! 1191: ! 1192: ; Register state: ! 1193: ; eax = ptlStart.ptl_y ! 1194: ; ebx = fl ! 1195: ; ecx = ptlStart.ptl_x ! 1196: ; edx = garbage ! 1197: ; esi = garbage ! 1198: ; edi = garbage ! 1199: ! 1200: mov edx, 80h ! 1201: ror dl, cl ! 1202: mov strip.ST_jBitMask, dl ; ST_jBitMask = ! 1203: ; (0x80 >> (ptlStart.ptl_x & 0x7)) ! 1204: ! 1205: mov esi, pdsurf ! 1206: mov edi, eax ; Now edi = ptlStart.ptl_y ! 1207: imul [esi].dsurf_lNextScan ! 1208: add eax, [esi].dsurf_pvBitmapStart ! 1209: sar ecx, 3 ! 1210: add eax, ecx ! 1211: mov strip.ST_pjScreen, eax ; ST_pjScreen = pchBits + ptlStart.ptl_y * ! 1212: ; cjDelta + (ptlStart.ptl_x >> 3) ! 1213: ! 1214: mov eax, dM ! 1215: mov ecx, dN ! 1216: mov esi, eqGamma_lo ! 1217: mov edi, eqGamma_hi ! 1218: ! 1219: ; Register state: ! 1220: ; eax = dM ! 1221: ; ebx = fl ! 1222: ; ecx = dN ! 1223: ; edx = garbage ! 1224: ; esi = eqGamma_lo ! 1225: ; edi = eqGamma_hi ! 1226: ! 1227: lea edx, [ecx + ecx] ; if (2 * dN > dM) ! 1228: cmp edx, eax ! 1229: mov edx, y0 ; Load y0 again ! 1230: jbe short after_half_flip ! 1231: ! 1232: test ebx, (FL_STYLED + FL_DONT_DO_HALF_FLIP) ! 1233: jnz short after_half_flip ! 1234: ! 1235: or ebx, FL_FLIP_HALF ! 1236: mov fl, ebx ! 1237: ! 1238: ; Do a half flip! ! 1239: ! 1240: not esi ! 1241: not edi ! 1242: add esi, eax ! 1243: adc edi, 0 ; eqGamma = -eqGamma - 1 + dM ! 1244: ! 1245: neg ecx ! 1246: add ecx, eax ; dN = dM - dN ! 1247: ! 1248: neg edx ! 1249: add edx, x0 ; y0 = x0 - y0 ! 1250: ! 1251: after_half_flip: ! 1252: mov strip.ST_flFlips, ebx ! 1253: and ebx, FL_STRIP_MASK ! 1254: ! 1255: .errnz FL_STRIP_SHIFT ! 1256: mov eax, apfn ! 1257: lea eax, [eax + ebx * 4] ! 1258: mov eax, [eax] ! 1259: mov pfn, eax ! 1260: mov eax, dM ! 1261: ! 1262: ; Register state: ! 1263: ; eax = dM ! 1264: ; ebx = garbage ! 1265: ; ecx = dN ! 1266: ; edx = y0 ! 1267: ; esi = eqGamma_lo ! 1268: ; edi = eqGamma_hi ! 1269: ! 1270: or ecx, ecx ! 1271: jz short zero_slope ! 1272: ! 1273: compute_dda_stuff: ! 1274: inc edx ! 1275: mul edx ! 1276: stc ; set the carry to accomplish -1 ! 1277: sbb eax, esi ! 1278: sbb edx, edi ; (y0 + 1) * dM - eqGamma - 1 ! 1279: div ecx ! 1280: ! 1281: mov esi, eax ; esi = i ! 1282: mov edi, edx ; edi = r ! 1283: ! 1284: xor edx, edx ! 1285: mov eax, dM ! 1286: div ecx ; edx = d_R, eax = d_I ! 1287: mov d_I, eax ! 1288: ! 1289: sub esi, x0 ! 1290: inc esi ! 1291: ! 1292: done_dda_stuff: ! 1293: lea eax, [strip.ST_alStrips] ! 1294: mov ebx, cPels ! 1295: ! 1296: ;-----------------------------------------------------------------------; ! 1297: ; Do our main DDA loop. ; ! 1298: ;-----------------------------------------------------------------------; ! 1299: ! 1300: sub edi, ecx ; offset remainder term from [0..dN) ! 1301: ; to [-dN..0) so test in inner ! 1302: ; loop is quicker ! 1303: align 4 ! 1304: ! 1305: ; Register state: ! 1306: ; eax = plStrip ; current pointer into strip array ! 1307: ; ebx = cPels ; total number of pels in line ! 1308: ; ecx = dN ; delta-N = rise in line ! 1309: ; edx = d_R ; d_I + d_R/dN = exact strip length ! 1310: ; esi = i ; length of current strip ! 1311: ; edi = r ; remainder term for current strip ! 1312: ; ; in range [-dN..0) ! 1313: ! 1314: public dda_loop ! 1315: dda_loop: ! 1316: sub ebx, esi ; subtract strip length from line length ! 1317: jle final_strip ; if negative, done with line ! 1318: ! 1319: mov [eax], esi ; write strip length to strip array ! 1320: add eax, 4 ! 1321: cmp plStripEnd, eax ; is the strip array buffer full? ! 1322: jbe short output_strips ; if so, empty it ! 1323: ! 1324: ; The output_strips routine jumps to here when done: ! 1325: ! 1326: done_output_strips: ! 1327: mov esi, d_I ; our normal strip length ! 1328: add edi, edx ; adjust our remainder term ! 1329: jl short dda_loop ! 1330: ! 1331: sub edi, ecx ; our remainder became 1 or more, so ! 1332: inc esi ; we increment this strip length ! 1333: ; and adjust the remainder term ! 1334: ! 1335: ; We've unrolled our loop a bit, so this should look familiar to the above: ! 1336: ! 1337: sub ebx, esi ; subtract strip length from line length ! 1338: jle final_strip ; if negative, done with line ! 1339: ! 1340: mov [eax], esi ; write strip length to strip array ! 1341: add eax, 4 ; adjust strip pointer ! 1342: ! 1343: ; Note that banking requires us to check if the strip array is full here ! 1344: ; too (and note that if output_strips is called it will return to ! 1345: ; done_output_strips): ! 1346: ! 1347: cmp plStripEnd, eax ! 1348: jbe short output_strips ! 1349: ! 1350: mov esi, d_I ; our normal strip length ! 1351: add edi, edx ; adjust our remainder term ! 1352: jl short dda_loop ! 1353: ! 1354: sub edi, ecx ; our remainder became 1 or more, so ! 1355: inc esi ; adjust ! 1356: jmp short dda_loop ! 1357: ! 1358: zero_slope: ! 1359: mov esi, 7fffffffh ! 1360: jmp short done_dda_stuff ! 1361: ! 1362: ;-----------------------------------------------------------------------; ! 1363: ; Empty strips buffer & possibly do x-major bank switch. ; ! 1364: ;-----------------------------------------------------------------------; ! 1365: ! 1366: output_strips: ! 1367: mov d_R, edx ! 1368: mov cPels, ebx ! 1369: mov i, esi ! 1370: mov r, edi ! 1371: mov dN, ecx ! 1372: ! 1373: lea edx, [strip] ! 1374: mov ecx, pls ! 1375: ! 1376: ; Call our strip routine: ! 1377: ! 1378: ptrCall <dword ptr pfn>, \ ! 1379: <edx, ecx, eax> ! 1380: ! 1381: ; It may be that we ran out of run in our strips buffer, and don't ! 1382: ; actually have to switch banks. See if that's the case: ! 1383: ! 1384: mov eax, cStripsInNextRun ! 1385: or eax, eax ! 1386: jg short done_strip_bank_switch ! 1387: ! 1388: ; We have to switch banks. See if we're going up or down: ! 1389: ! 1390: mov esi, pdsurf ! 1391: test fl, FL_FLIP_V ! 1392: jz short bank_x_down ! 1393: ! 1394: bank_x_up: ! 1395: mov edi, strip.ST_pjScreen ! 1396: sub edi, [esi].dsurf_pvBitmapStart ! 1397: mov ebx, [esi].dsurf_rcl1WindowClip.yTop ! 1398: dec ebx ; we want yTop - 1 to be mapped in ! 1399: ! 1400: ; Map in the next higher bank: ! 1401: ! 1402: ptrCall <dword ptr [esi].dsurf_pfnBankControl>, \ ! 1403: <esi, ebx, JustifyBottom>; ebx, esi and edi are preserved ! 1404: ! 1405: lea eax, [ebx + 1] ! 1406: sub eax, [esi].dsurf_rcl1WindowClip.yTop ! 1407: ; eax = # of scans can do in bank ! 1408: ! 1409: add edi, [esi].dsurf_pvBitmapStart ! 1410: mov strip.ST_pjScreen, edi ! 1411: ! 1412: jmp short done_strip_bank_switch ! 1413: ! 1414: bank_x_down: ! 1415: mov edi, strip.ST_pjScreen ! 1416: sub edi, [esi].dsurf_pvBitmapStart ! 1417: mov ebx, [esi].dsurf_rcl1WindowClip.yBottom ! 1418: ! 1419: ; Map in the next lower bank: ! 1420: ! 1421: ptrCall <dword ptr [esi].dsurf_pfnBankControl>, \ ! 1422: <esi, ebx, JustifyTop> ; ebx, esi and edi are preserved ! 1423: ! 1424: mov eax, [esi].dsurf_rcl1WindowClip.yBottom ! 1425: sub eax, ebx ; eax = # scans can do in bank ! 1426: ! 1427: add edi, [esi].dsurf_pvBitmapStart ! 1428: mov strip.ST_pjScreen,edi ! 1429: ! 1430: done_strip_bank_switch: ! 1431: ! 1432: ; eax = cStripsInNextRun ! 1433: ! 1434: lea edx, [strip.ST_alStrips + (STRIP_MAX * 4)] ! 1435: sub eax, STRIP_MAX ! 1436: mov cStripsInNextRun, eax ! 1437: jge short get_ready_for_more_strips ! 1438: lea edx, [edx + eax * 4] ! 1439: ! 1440: get_ready_for_more_strips: ! 1441: mov plStripEnd, edx ! 1442: ! 1443: mov esi, i ! 1444: mov edi, r ! 1445: mov ebx, cPels ! 1446: mov edx, d_R ! 1447: mov ecx, dN ! 1448: lea eax, [strip.ST_alStrips] ! 1449: jmp done_output_strips ! 1450: ! 1451: ;-----------------------------------------------------------------------; ! 1452: ; Empty strips buffer. Either get new line or do y-major bank switch. ; ! 1453: ;-----------------------------------------------------------------------; ! 1454: ! 1455: final_strip: ! 1456: add ebx, esi ! 1457: mov [eax], ebx ! 1458: add eax, 4 ! 1459: ! 1460: cmp cPelsAfterThisBank, 0 ! 1461: jg short bank_y_major ! 1462: ! 1463: very_final_strip: ! 1464: lea edx, [strip] ! 1465: mov ecx, pls ! 1466: ! 1467: ptrCall <dword ptr pfn>, \ ! 1468: <edx, ecx, eax> ! 1469: ! 1470: ; NOTE: next_line is jumped to from various places, and it cannot assume ! 1471: ; any registers are loaded. ! 1472: ! 1473: next_line: ! 1474: mov ebx, flStart ! 1475: testb ebx, FL_COMPLEX_CLIP ! 1476: jnz short see_if_done_complex_clipping ! 1477: ! 1478: mov edx, pptfxBuf ! 1479: cmp edx, pptfxBufEnd ! 1480: je short all_done ! 1481: ! 1482: mov esi, [edx].ptl_x ! 1483: mov ecx, [edx].ptl_y ! 1484: add edx, size POINTL ! 1485: mov pptfxBuf, edx ! 1486: mov eax, [edx].ptl_x ! 1487: mov edi, [edx].ptl_y ! 1488: jmp the_main_loop ! 1489: ! 1490: all_done: ! 1491: mov eax, 1 ! 1492: ! 1493: cRet bLines ! 1494: ! 1495: see_if_done_complex_clipping: ! 1496: mov ebx, fl ! 1497: dec cptfx ! 1498: jz short all_done ! 1499: ! 1500: and ebx, NOT FL_FLIP_HALF ; Make sure the next run doesn't have ! 1501: mov fl, ebx ; to do a half-flip if it doesn't ! 1502: ; want to ! 1503: jmp continue_complex_clipping ! 1504: ! 1505: ;-----------------------------------------------------------------------; ! 1506: ; Switch banks for a y-major line. ; ! 1507: ;-----------------------------------------------------------------------; ! 1508: ! 1509: public bank_y_major ! 1510: bank_y_major: ! 1511: mov d_R, edx ! 1512: mov i, esi ! 1513: mov r, edi ! 1514: mov dN, ecx ! 1515: sub ebx, esi ; Undo our offset ! 1516: ! 1517: bank_y_output_strips: ! 1518: lea edx, [strip] ! 1519: mov ecx, pls ! 1520: ! 1521: ptrCall <dword ptr pfn>, \ ! 1522: <edx, ecx, eax> ! 1523: ! 1524: mov esi, pdsurf ! 1525: test fl, FL_FLIP_V ! 1526: jz short bank_y_down ! 1527: ! 1528: bank_y_up: ! 1529: mov edi, strip.ST_pjScreen ! 1530: sub edi, [esi].dsurf_pvBitmapStart ! 1531: mov ecx, [esi].dsurf_rcl1WindowClip.yTop ! 1532: push ecx ! 1533: dec ecx ; we want yTop - 1 to be mapped in ! 1534: ! 1535: ; Map in the next higher bank: ! 1536: ! 1537: ptrCall <dword ptr [esi].dsurf_pfnBankControl>, \ ! 1538: <esi, ecx, JustifyBottom>; ebx, esi and edi are preserved ! 1539: ! 1540: pop ecx ! 1541: sub ecx, [esi].dsurf_rcl1WindowClip.yTop ! 1542: ; ecx = # of scans can do in bank ! 1543: ! 1544: add edi, [esi].dsurf_pvBitmapStart ! 1545: mov strip.ST_pjScreen, edi ! 1546: ! 1547: mov edx, cPelsAfterThisBank ; edx = cPelsAfterBank ! 1548: lea eax, [strip.ST_alStrips] ; eax = plStrip ! 1549: or ebx, ebx ; ebx = cPels ! 1550: jge bank_y_done_partial_strip ! 1551: jmp short bank_y_done_switch ! 1552: ! 1553: bank_y_down: ! 1554: mov edi, strip.ST_pjScreen ! 1555: sub edi, [esi].dsurf_pvBitmapStart ! 1556: mov ecx, [esi].dsurf_rcl1WindowClip.yBottom ! 1557: push ecx ! 1558: ! 1559: ; Map in the next lower bank: ! 1560: ! 1561: ptrCall <dword ptr [esi].dsurf_pfnBankControl>, \ ! 1562: <esi, ecx, JustifyTop> ; ebx, esi and edi are preserved ! 1563: ! 1564: pop eax ! 1565: mov ecx, [esi].dsurf_rcl1WindowClip.yBottom ! 1566: sub ecx, eax ; ecx = # scans can do in bank ! 1567: ! 1568: add edi, [esi].dsurf_pvBitmapStart ! 1569: mov strip.ST_pjScreen, edi ! 1570: ! 1571: mov edx, cPelsAfterThisBank ; edx = cPelsAfterBank ! 1572: lea eax, [strip.ST_alStrips] ; eax = plStrip ! 1573: or ebx, ebx ; ebx = cPels ! 1574: jge short bank_y_done_partial_strip ! 1575: ! 1576: bank_y_done_switch: ! 1577: ! 1578: ; Handle a single strip stretching over multiple banks: ! 1579: ! 1580: test fl, FL_FLIP_HALF ! 1581: jz short bank_y_no_half_flip ! 1582: ! 1583: ; We now have to adjust for the fact that the strip drawers always leave ! 1584: ; the state ready for the next new strip (e.g., if we're doing vertical ! 1585: ; strips, it advances pjScreen one to the right after drawing each strip). ! 1586: ; But the problem is that since we crossed a bank, we have to continue the ! 1587: ; *old* strip, so we have to undo that advance: ! 1588: ! 1589: bank_y_half_flip: ! 1590: ror strip.ST_jStyleMask, 1 ! 1591: ror strip.ST_jBitMask, 1 ! 1592: adc strip.ST_pjScreen, 0 ! 1593: jmp short bank_y_done_bit_adjust ! 1594: ! 1595: bank_y_no_half_flip: ! 1596: rol strip.ST_jStyleMask, 1 ! 1597: rol strip.ST_jBitMask, 1 ! 1598: sbb strip.ST_pjScreen, 0 ! 1599: ! 1600: bank_y_done_bit_adjust: ! 1601: mov esi, ebx ! 1602: neg esi ; esi = # pels left in strip ! 1603: ! 1604: ; eax = pointer to first strip entry ! 1605: ; ebx = negative esi ! 1606: ; ecx = # of pels we can put down in this window ! 1607: ; edx = # of pels remaining to do in line ! 1608: ; esi = # of pels left in strip ! 1609: ! 1610: ; We have three special cases to check here: ! 1611: ; ! 1612: ; 1) If the strip spans the entire next window ! 1613: ; 2) This is the last strip in the line ! 1614: ; 3) Neither of the above ! 1615: ! 1616: cmp edx,ecx ;if line shorter than bank, ! 1617: jle short bank_y_check_if_last_strip; know strip doesn't span bank ! 1618: ! 1619: cmp esi,ecx ;if line spans bank, don't have ! 1620: jl short bank_y_continue_strip ; to check if last strip ! 1621: ! 1622: ; If ((# of pels in line > window size) && (# of pels in strip > window size)) ! 1623: ; then the strip spans this bank: ! 1624: ! 1625: mov [eax], ecx ! 1626: add eax, 4 ! 1627: add ebx, ecx ! 1628: sub edx, ecx ! 1629: mov cPelsAfterThisBank, edx ! 1630: jmp bank_y_output_strips ! 1631: ! 1632: bank_y_check_if_last_strip: ! 1633: cmp esi, edx ;if strip is shorter than line, ! 1634: jl short bank_y_continue_strip ; we know this isn't the last ! 1635: ; strip ! 1636: ! 1637: ; Handle case where this is the last strip in the line and it overlaps a bank: ! 1638: ! 1639: mov [eax], edx ! 1640: add eax, 4 ! 1641: jmp very_final_strip ! 1642: ! 1643: bank_y_continue_strip: ! 1644: mov [eax], esi ! 1645: add eax, 4 ! 1646: ! 1647: bank_y_done_partial_strip: ! 1648: add ebx, edx ; cPels += cPelsAfterThisBank ! 1649: sub edx, ecx ; cPelsAfterThisBank -= cyWindow ! 1650: ! 1651: jle short bank_y_get_ready ! 1652: sub ebx, edx ! 1653: ! 1654: bank_y_get_ready: ! 1655: mov cPelsAfterThisBank, edx ! 1656: mov edi, r ! 1657: mov edx, d_R ! 1658: mov ecx, dN ! 1659: jmp done_output_strips ! 1660: ! 1661: ;---------------------------Private-Routine-----------------------------; ! 1662: ; do_some_styling ! 1663: ; ! 1664: ; Inputs: ! 1665: ; eax = ptlStart.ptl_y ! 1666: ; ebx = fl ! 1667: ; ecx = ptlStart.ptl_x ! 1668: ; Preserves: ! 1669: ; eax, ebx, ecx ! 1670: ; Output: ! 1671: ; Exits to done_styling. ! 1672: ; ! 1673: ;-----------------------------------------------------------------------; ! 1674: ! 1675: public do_some_styling ! 1676: do_some_styling: ! 1677: mov esi, pls ! 1678: mov ptlStart.ptl_x, ecx ! 1679: ! 1680: mov edi, [esi].LS_spNext ; spThis ! 1681: mov edx, edi ! 1682: add edx, cStylePels ; spNext ! 1683: ! 1684: testb ebx, FL_ALTERNATESTYLED ! 1685: jz short do_non_alternate_style ! 1686: ! 1687: ; Do alternate styles: ! 1688: ! 1689: and edx, 1 ! 1690: mov [esi].LS_spNext, edx ! 1691: testb ebx, FL_FLIP_H ! 1692: jz short alternate_left_to_right ! 1693: ! 1694: add ecx, edx ! 1695: sub ecx, x0 ! 1696: add ecx, xStart ; ptlStart.x + spNext - x0 + xStart + 1 ! 1697: inc ecx ! 1698: jmp short compute_alternate_mask ! 1699: ! 1700: alternate_left_to_right: ! 1701: add ecx, edi ! 1702: add ecx, x0 ! 1703: sub ecx, xStart ; ptlStart.x + spThis + x0 - xStart ! 1704: ! 1705: compute_alternate_mask: ! 1706: mov strip.ST_jStyleMask, 55h ! 1707: ror strip.ST_jStyleMask, cl ! 1708: ! 1709: mov strip.ST_spRemaining, 1 ! 1710: mov strip.ST_xyDensity, 1 ! 1711: mov ecx, ptlStart.ptl_x ! 1712: jmp done_styling ! 1713: ! 1714: do_non_alternate_style: ! 1715: ! 1716: ; For styles, we don't bother to keep the style position normalized. ! 1717: ; (we do ensure that it's positive, though). If a figure is over 2 ! 1718: ; billion pels long, we'll be a pel off in our style state (oops!). ! 1719: ! 1720: and edx, 7fffffffh ! 1721: mov [esi].LS_spNext, edx ! 1722: mov ptlStart.ptl_y, eax ! 1723: testb ebx, FL_MASKSTYLED ! 1724: jz short do_arbitrary_style ! 1725: ! 1726: ; Do mask styles: ! 1727: ! 1728: mov eax, [esi].LS_xyDensity ; Gotta copy to strips struct ! 1729: mov strip.ST_xyDensity, eax ! 1730: ! 1731: testb ebx, FL_FLIP_H ! 1732: jz short mask_left_to_right ! 1733: ! 1734: sub edx, x0 ! 1735: add edx, xStart ! 1736: add edx, 2 ! 1737: mov eax, edx ! 1738: xor edx, edx ! 1739: ! 1740: mov edi, STYLE_DENSITY ! 1741: div edi ! 1742: add ecx, eax ! 1743: inc edx ! 1744: mov eax, [esi].LS_ulStyleMaskRtoL ! 1745: jmp short compute_masked_mask ! 1746: ! 1747: mask_left_to_right: ! 1748: add edi, x0 ! 1749: sub edi, xStart ! 1750: mov eax, edi ! 1751: xor edx, edx ! 1752: mov edi, STYLE_DENSITY ! 1753: div edi ! 1754: sub ecx, eax ! 1755: neg edx ! 1756: add edx, STYLE_DENSITY ! 1757: mov eax, [esi].LS_ulStyleMaskLtoR ! 1758: ! 1759: compute_masked_mask: ! 1760: mov strip.ST_spRemaining, edx ! 1761: ror al, cl ! 1762: mov strip.ST_jStyleMask, al ! 1763: mov eax, ptlStart.ptl_y ! 1764: mov ecx, ptlStart.ptl_x ! 1765: jmp done_styling ! 1766: ! 1767: ; Do arbitrary styles: ! 1768: ! 1769: do_arbitrary_style: ! 1770: testb ebx, FL_FLIP_H ! 1771: jz short arbitrary_left_to_right ! 1772: ! 1773: sub edx, x0 ! 1774: add edx, xStart ! 1775: mov eax, edx ! 1776: xor edx, edx ! 1777: div [esi].LS_spTotal ! 1778: ! 1779: neg edx ! 1780: jge short continue_right_to_left ! 1781: add edx, [esi].LS_spTotal ! 1782: not eax ! 1783: ! 1784: continue_right_to_left: ! 1785: mov edi, dword ptr [esi].LS_jStartMask ! 1786: not edi ! 1787: mov ecx, [esi].LS_aspRtoL ! 1788: jmp short compute_arbitrary_stuff ! 1789: ! 1790: arbitrary_left_to_right: ! 1791: add edi, x0 ! 1792: sub edi, xStart ! 1793: mov eax, edi ! 1794: xor edx, edx ! 1795: div [esi].LS_spTotal ! 1796: mov edi, dword ptr [esi].LS_jStartMask ! 1797: mov ecx, [esi].LS_aspLtoR ! 1798: ! 1799: compute_arbitrary_stuff: ! 1800: ; eax = sp / spTotal ! 1801: ; ebx = fl ! 1802: ; ecx = pspStart ! 1803: ; edx = sp % spTotal ! 1804: ; esi = pls ! 1805: ; edi = jStyleMask ! 1806: ! 1807: and eax, [esi].LS_cStyle ; if odd length style and second run ! 1808: and al, 1 ; through style array, flip the ! 1809: jz short odd_style_array_done ; meaning of the elements ! 1810: not edi ! 1811: ! 1812: odd_style_array_done: ! 1813: mov eax, [esi].LS_cStyle ! 1814: mov strip.ST_pspStart, ecx ! 1815: lea eax, [ecx + eax * 4 - 4] ! 1816: mov strip.ST_pspEnd, eax ! 1817: ! 1818: find_psp: ! 1819: sub edx, [ecx] ! 1820: jl short found_psp ! 1821: add ecx, 4 ! 1822: jmp short find_psp ! 1823: ! 1824: found_psp: ! 1825: mov strip.ST_psp, ecx ! 1826: neg edx ! 1827: mov strip.ST_spRemaining, edx ! 1828: ! 1829: sub ecx, strip.ST_pspStart ! 1830: test ecx, 4 ; size STYLEPOS ! 1831: jz short done_arbitrary ! 1832: not edi ! 1833: ! 1834: done_arbitrary: ! 1835: mov dword ptr strip.ST_jStyleMask, edi ! 1836: mov eax, ptlStart.ptl_y ! 1837: mov ecx, ptlStart.ptl_x ! 1838: jmp done_styling ! 1839: ! 1840: ;---------------------------Private-Routine-----------------------------; ! 1841: ; do_some_clipping ! 1842: ; ! 1843: ; Inputs: ! 1844: ; eax = garbage ! 1845: ; ebx = fl ! 1846: ; ecx = x0 ! 1847: ; edx = garbage ! 1848: ; esi = x1 ! 1849: ; edi = garbage ! 1850: ; ! 1851: ; Decides whether to do simple or complex clipping. ! 1852: ; ! 1853: ;-----------------------------------------------------------------------; ! 1854: ! 1855: align 4 ! 1856: ! 1857: public do_some_clipping ! 1858: do_some_clipping: ! 1859: testb ebx, FL_COMPLEX_CLIP ! 1860: jnz initialize_complex_clipping ! 1861: ! 1862: ;-----------------------------------------------------------------------; ! 1863: ; simple_clipping ! 1864: ; ! 1865: ; Inputs: ! 1866: ; ebx = fl ! 1867: ; ecx = x0 ! 1868: ; esi = x1 ! 1869: ; Output: ! 1870: ; ebx = fl ! 1871: ; ecx = new x0 (stack variable updated too) ! 1872: ; esi = new x1 ! 1873: ; y0 stack variable updated ! 1874: ; Uses: ! 1875: ; All registers ! 1876: ; Exits: ! 1877: ; to done_clipping ! 1878: ; ! 1879: ; This routine handles clipping the line to the clip rectangle (it's ! 1880: ; faster to handle this case in the driver than to call the engine to ! 1881: ; clip for us). ! 1882: ; ! 1883: ; Fractional end-point lines complicate our lives a bit when doing ! 1884: ; clipping: ! 1885: ; ! 1886: ; 1) For styling, we must know the unclipped line's length in pels, so ! 1887: ; that we can correctly update the styling state when the line is ! 1888: ; clipped. For this reason, I do clipping after doing the hard work ! 1889: ; of figuring out which pixels are at the ends of the line (this is ! 1890: ; wasted work if the line is not styled and is completely clipped, ! 1891: ; but I think it's simpler this way). Another reason is that we'll ! 1892: ; have calculated eqGamma already, which we use for the intercept ! 1893: ; calculations. ! 1894: ; ! 1895: ; With the assumption that most lines will not be completely clipped ! 1896: ; away, this strategy isn't too painful. ! 1897: ; ! 1898: ; 2) x0, y0 are not necessarily zero, where (x0, y0) is the start pel of ! 1899: ; the line. ! 1900: ; ! 1901: ; 3) We know x0, y0 and x1, but not y1. We haven't needed to calculate ! 1902: ; y1 until now. We'll need the actual value, and not an upper bound ! 1903: ; like y1 = LFLOOR(dM) + 2 because we have to be careful when ! 1904: ; calculating x(y) that y0 <= y <= y1, otherwise we can cause an ! 1905: ; overflow on the divide (which, needless to say, is bad). ! 1906: ; ! 1907: ;-----------------------------------------------------------------------; ! 1908: ! 1909: public simple_clipping ! 1910: simple_clipping: ! 1911: mov edi, prclClip ; get pointer to normalized clip rect ! 1912: and ebx, FL_RECTLCLIP_MASK ; (it's lower-right exclusive) ! 1913: ! 1914: .errnz (FL_RECTLCLIP_SHIFT - 2); ((ebx AND FL_RECTLCLIP_MASK) shr ! 1915: .errnz (size RECTL) - 16 ; FL_RECTLCLIP_SHIFT) is our index ! 1916: lea edi, [edi + ebx*4] ; into the array of rectangles ! 1917: ! 1918: mov edx, [edi].xRight ; load the rect coordinates ! 1919: mov eax, [edi].xLeft ! 1920: mov ebx, [edi].yBottom ! 1921: mov edi, [edi].yTop ! 1922: ! 1923: ; Translate to our origin and so some quick completely clipped tests: ! 1924: ! 1925: sub edx, x ! 1926: cmp ecx, edx ! 1927: jge totally_clipped ; totally clipped if x0 >= xRight ! 1928: ! 1929: sub eax, x ! 1930: cmp esi, eax ! 1931: jl totally_clipped ; totally clipped if x1 < xLeft ! 1932: ! 1933: sub ebx, y ! 1934: cmp y0, ebx ! 1935: jge totally_clipped ; totally clipped if y0 >= yBottom ! 1936: ! 1937: sub edi, y ! 1938: ! 1939: ; Save some state: ! 1940: ! 1941: mov xClipRight, edx ! 1942: mov xClipLeft, eax ! 1943: ! 1944: cmp esi, edx ; if (x1 >= xRight) x1 = xRight - 1 ! 1945: jl short calculate_y1 ! 1946: lea esi, [edx - 1] ! 1947: ! 1948: calculate_y1: ! 1949: mov eax, esi ; y1 = (x1 * dN + eqGamma) / dM ! 1950: mul dN ! 1951: add eax, eqGamma_lo ! 1952: adc edx, eqGamma_hi ! 1953: div dM ! 1954: ! 1955: cmp edi, eax ; if (yTop > y1) clipped ! 1956: jg short totally_clipped ! 1957: ! 1958: cmp ebx, eax ; if (yBottom > y1) know x1 ! 1959: jg short x1_computed ! 1960: ! 1961: mov eax, ebx ; x1 = (yBottom * dM + eqBeta) / dN ! 1962: mul dM ! 1963: stc ! 1964: sbb eax, eqGamma_lo ! 1965: sbb edx, eqGamma_hi ! 1966: div dN ! 1967: mov esi, eax ! 1968: ! 1969: ; At this point, we've taken care of calculating the intercepts with the ! 1970: ; right and bottom edges. Now we work on the left and top edges: ! 1971: ! 1972: x1_computed: ! 1973: mov edx, y0 ! 1974: ! 1975: mov eax, xClipLeft ; don't have to compute y intercept ! 1976: cmp eax, ecx ; at left edge if line starts to ! 1977: jle short top_intercept ; right of left edge ! 1978: ! 1979: mov ecx, eax ; x0 = xLeft ! 1980: mul dN ; y0 = (xLeft * dN + eqGamma) / dM ! 1981: add eax, eqGamma_lo ! 1982: adc edx, eqGamma_hi ! 1983: div dM ! 1984: ! 1985: cmp ebx, eax ; if (yBottom <= y0) clipped ! 1986: jle short totally_clipped ! 1987: ! 1988: mov edx, eax ! 1989: mov y0, eax ! 1990: ! 1991: top_intercept: ! 1992: mov ebx, fl ; get ready to leave ! 1993: mov x0, ecx ! 1994: ! 1995: cmp edi, edx ; if (yTop <= y0) done clipping ! 1996: jle done_clipping ! 1997: ! 1998: mov eax, edi ; x0 = (yTop * dM + eqBeta) / dN + 1 ! 1999: mul dM ! 2000: stc ! 2001: sbb eax, eqGamma_lo ! 2002: sbb edx, eqGamma_hi ! 2003: div dN ! 2004: lea ecx, [eax + 1] ! 2005: ! 2006: cmp xClipRight, ecx ; if (xRight <= x0) clipped ! 2007: jle short totally_clipped ! 2008: ! 2009: mov y0, edi ; y0 = yTop ! 2010: mov x0, ecx ! 2011: jmp done_clipping ; all done! ! 2012: ! 2013: totally_clipped: ! 2014: ! 2015: ; The line is completely clipped. See if we have to update our style state: ! 2016: ! 2017: mov ebx, fl ! 2018: testb ebx, FL_STYLED ! 2019: jz next_line ! 2020: ! 2021: ; Adjust our style state: ! 2022: ! 2023: mov esi, pls ! 2024: mov eax, [esi].LS_spNext ! 2025: add eax, cStylePels ! 2026: mov [esi].LS_spNext, eax ! 2027: ! 2028: cmp eax, [esi].LS_spTotal2 ! 2029: jb next_line ! 2030: ! 2031: ; Have to normalize first: ! 2032: ! 2033: xor edx, edx ! 2034: div [esi].LS_spTotal2 ! 2035: mov [esi].LS_spNext, edx ! 2036: ! 2037: jmp next_line ! 2038: ! 2039: ;-----------------------------------------------------------------------; ! 2040: ! 2041: initialize_complex_clipping: ! 2042: mov eax, dN ; save a copy of original dN ! 2043: mov dN_Original, eax ! 2044: ! 2045: ;---------------------------Private-Routine-----------------------------; ! 2046: ; continue_complex_clipping ! 2047: ; ! 2048: ; Inputs: ! 2049: ; ebx = fl ! 2050: ; Output: ! 2051: ; ebx = fl ! 2052: ; ecx = x0 ! 2053: ; esi = x1 ! 2054: ; Uses: ! 2055: ; All registers. ! 2056: ; Exits: ! 2057: ; to done_clipping ! 2058: ; ! 2059: ; This routine handles the necessary initialization for the next ! 2060: ; run in the CLIPLINE structure. ! 2061: ; ! 2062: ; NOTE: This routine is jumped to from two places! ! 2063: ;-----------------------------------------------------------------------; ! 2064: ! 2065: public continue_complex_clipping ! 2066: continue_complex_clipping: ! 2067: mov edi, prun ! 2068: mov ecx, xStart ! 2069: testb ebx, FL_FLIP_H ! 2070: jz short complex_left_to_right ! 2071: ! 2072: complex_right_to_left: ! 2073: ! 2074: ; Figure out x0 and x1 for right-to-left lines: ! 2075: ! 2076: add ecx, cStylePels ! 2077: dec ecx ! 2078: mov esi, ecx ; esi = ecx = xStart + cStylePels - 1 ! 2079: sub ecx, [edi].RUN_iStop ; New x0 ! 2080: sub esi, [edi].RUN_iStart ; New x1 ! 2081: jmp short complex_reset_variables ! 2082: ! 2083: complex_left_to_right: ! 2084: ! 2085: ; Figure out x0 and x1 for left-to-right lines: ! 2086: ! 2087: mov esi, ecx ; esi = ecx = xStart ! 2088: add ecx, [edi].RUN_iStart ; New x0 ! 2089: add esi, [edi].RUN_iStop ; New x1 ! 2090: ! 2091: complex_reset_variables: ! 2092: mov x0, ecx ! 2093: ! 2094: ; The half flip mucks with some of our variables, and we have to reset ! 2095: ; them every pass. We would have to reset eqGamma too, but it never ! 2096: ; got saved to memory in its modified form. ! 2097: ! 2098: add edi, size RUN ! 2099: mov prun, edi ; Increment run pointer for next time ! 2100: ! 2101: mov edi, pls ! 2102: mov eax, [edi].LS_spComplex ! 2103: mov [edi].LS_spNext, eax ; pls->spNext = pls->spComplex ! 2104: ! 2105: mov eax, dN_Original ; dN = dN_Original ! 2106: mov dN, eax ! 2107: ! 2108: mul ecx ! 2109: add eax, eqGamma_lo ! 2110: adc edx, eqGamma_hi ; [edx:eax] = dN*x0 + eqGamma ! 2111: ! 2112: div dM ! 2113: mov y0, eax ! 2114: jmp done_clipping ! 2115: ! 2116: endProc bLines ! 2117: ! 2118: _TEXT$03 ends ! 2119: ! 2120: end
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.