ntddk/src/video/displays/vga256/i386/lines.asm - annotate

Return to lines.asm CVS log
Up to [WindowsNT SDKs] / ntddk / src / video / displays / vga256 / i386
Annotation of ntddk/src/video/displays/vga256/i386/lines.asm, revision 1.1.1.1

1.1       root        1: ;---------------------------Module-Header------------------------------;
                      2: ; Module Name: lines.asm
                      3: ;
                      4: ; Draws a set of connected polylines.
                      5: ;
                      6: ; The actual pixel-lighting code is different depending on if the lines
                      7: ; are styled/unstyled and we're doing an arbitrary ROP or set-style ROP.
                      8: ;
                      9: ; Lines are drawn from left to right.  So if a line moves from right
                     10: ; to left, the endpoints are swapped and the line is drawn from left to
                     11: ; right.
                     12: ;
                     13: ; See s3\lines.cxx for a portable version (sans simple clipping).
                     14: ;
                     15: ; Copyright (c) 1992 Microsoft Corporation
                     16: ;-----------------------------------------------------------------------;
                     17: 
                     18:         .386
                     19: 
                     20:         .model  small,c
                     21: 
                     22:         assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
                     23:         assume fs:nothing,gs:nothing
                     24: 
                     25:         .xlist
                     26:         include stdcall.inc             ;calling convention cmacros
                     27:         include i386\egavga.inc
                     28:         include i386\strucs.inc
                     29:         include i386\driver.inc
                     30:         include i386\lines.inc
                     31:         .list
                     32: 
                     33:         .data
                     34: 
                     35:         public gaflRoundTable
                     36: gaflRoundTable       label  dword
                     37:         dd      FL_H_ROUND_DOWN + FL_V_ROUND_DOWN       ; no flips
                     38:         dd      FL_H_ROUND_DOWN + FL_V_ROUND_DOWN       ; D flip
                     39:         dd      FL_H_ROUND_DOWN                         ; V flip
                     40:         dd      FL_V_ROUND_DOWN                         ; D & V flip
                     41:         dd      FL_V_ROUND_DOWN                         ; slope one
                     42:         dd      0baadf00dh
                     43:         dd      FL_H_ROUND_DOWN                         ; slope one & V flip
                     44:         dd      0baadf00dh
                     45: 
                     46:         .code
                     47: 
                     48: ;--------------------------------Macro----------------------------------;
                     49: ; testb ebx, <mask>
                     50: ;
                     51: ; Substitutes a byte compare if the mask is entirely in the lo-byte or
                     52: ; hi-byte (thus saving 3 bytes of code space).
                     53: ;
                     54: ;-----------------------------------------------------------------------;
                     55: 
                     56: TESTB   macro   targ,mask,thirdarg
                     57:         local   mask2,delta
                     58: 
                     59: ifnb <thirdarg>
                     60:         .err    TESTB mask must be enclosed in brackets!
                     61: endif
                     62: 
                     63:         delta = 0
                     64:         mask2 = mask
                     65: 
                     66:         if mask2 AND 0ffff0000h
                     67:             test targ,mask                      ; If bit set in hi-word,
                     68:             exitm                               ; test entire dword
                     69:         endif
                     70: 
                     71:         if mask2 AND 0ff00h
                     72:             if mask2 AND 0ffh                   ; If bit set in lo-byte and
                     73:                 test targ,mask                  ; hi-byte, test entire dword
                     74:                 exitm
                     75:             endif
                     76: 
                     77:             mask2 = mask2 SHR 8
                     78:             delta = 1
                     79:         endif
                     80: 
                     81: ifidni <targ>,<EBX>
                     82:         if delta
                     83:             test bh,mask2
                     84:         else
                     85:             test bl,mask2
                     86:         endif
                     87:         exitm
                     88: endif
                     89: 
                     90:         .err    Too bad TESTB doesn't support targets other than ebx!
                     91: endm
                     92: 
                     93: ;---------------------------Public-Routine------------------------------;
                     94: ; BOOL bLines(ppdev, pptfxFirst, pptfxBuf, prun, cptfx, pls,
                     95: ;        prclClip, apfn[], flStart)
                     96: ;
                     97: ; Do all the DDA calculations for lines.
                     98: ;
                     99: ; Doing Lines Right
                    100: ; -----------------
                    101: ;
                    102: ; In NT, all lines are given to the device driver in fractional
                    103: ; coordinates, in a 28.4 fixed point format.  The lower 4 bits are
                    104: ; fractional for sub-pixel positioning.
                    105: ;
                    106: ; Note that you CANNOT! just round the coordinates to integers
                    107: ; and pass the results to your favorite integer Bresenham routine!!
                    108: ; (Unless, of course, you have such a high resolution device that
                    109: ; nobody will notice -- not likely for a display device.)  The
                    110: ; fractions give a more accurate rendering of the line -- this is
                    111: ; important for things like our Bezier curves, which would have 'kinks'
                    112: ; if the points in its polyline approximation were rounded to integers.
                    113: ;
                    114: ; Unfortunately, for fractional lines there is more setup work to do
                    115: ; a DDA than for integer lines.  However, the main loop is exactly
                    116: ; the same (and can be done entirely with 32 bit math).
                    117: ;
                    118: ; If You've Got Hardware That Does Bresenham
                    119: ; ------------------------------------------
                    120: ;
                    121: ; A lot of hardware limits DDA error terms to 'n' bits.  With fractional
                    122: ; coordinates, 4 bits are given to the fractional part, letting
                    123: ; you draw in hardware only those lines that lie entirely in a 2^(n-4)
                    124: ; by 2^(n-4) pixel space.
                    125: ;
                    126: ; And you still have to correctly draw those lines with coordinates
                    127: ; outside that space!  Remember that the screen is only a viewport
                    128: ; onto a 28.4 by 28.4 space -- if any part of the line is visible
                    129: ; you MUST render it precisely, regardless of where the end points lie.
                    130: ; So even if you do it in software, somewhere you'll have to have a
                    131: ; 32 bit DDA routine.
                    132: ;
                    133: ; Our Implementation
                    134: ; ------------------
                    135: ;
                    136: ; We employ a run length slice algorithm: our DDA calculates the
                    137: ; number of pixels that are in each row (or 'strip') of pixels.
                    138: ;
                    139: ; We've separated the running of the DDA and the drawing of pixels:
                    140: ; we run the DDA for several iterations and store the results in
                    141: ; a 'strip' buffer (which are the lengths of consecutive pixel rows of
                    142: ; the line), then we crank up a 'strip drawer' that will draw all the
                    143: ; strips in the buffer.
                    144: ;
                    145: ; We also employ a 'half-flip' to reduce the number of strip
                    146: ; iterations we need to do in the DDA and strip drawing loops: when a
                    147: ; (normalized) line's slope is more than 1/2, we do a final flip
                    148: ; about the line y = (1/2)x.  So now, instead of each strip being
                    149: ; consecutive horizontal or vertical pixel rows, each strip is composed
                    150: ; of those pixels aligned in 45 degree rows.  So a line like (0, 0) to
                    151: ; (128, 128) would generate only one strip.
                    152: ;
                    153: ; We also always draw only left-to-right.
                    154: ;
                    155: ; Style lines may have arbitrary style patterns.  We specially
                    156: ; optimize the default patterns (and call them 'masked' styles).
                    157: ;
                    158: ; The DDA Derivation
                    159: ; ------------------
                    160: ;
                    161: ; Here is how I like to think of the DDA calculation.
                    162: ;
                    163: ; We employ Knuth's "diamond rule": rendering a one-pixel-wide line
                    164: ; can be thought of as dragging a one-pixel-wide by one-pixel-high
                    165: ; diamond along the true line.  Pixel centers lie on the integer
                    166: ; coordinates, and so we light any pixel whose center gets covered
                    167: ; by the "drag" region (John D. Hobby, Journal of the Association
                    168: ; for Computing Machinery, Vol. 36, No. 2, April 1989, pp. 209-229).
                    169: ;
                    170: ; We must define which pixel gets lit when the true line falls
                    171: ; exactly half-way between two pixels.  In this case, we follow
                    172: ; the rule: when two pels are equidistant, the upper or left pel
                    173: ; is illuminated, unless the slope is exactly one, in which case
                    174: ; the upper or right pel is illuminated.  (So we make the edges
                    175: ; of the diamond exclusive, except for the top and left vertices,
                    176: ; which are inclusive, unless we have slope one.)
                    177: ;
                    178: ; This metric decides what pixels should be on any line BEFORE it is
                    179: ; flipped around for our calculation.  Having a consistent metric
                    180: ; this way will let our lines blend nicely with our curves.  The
                    181: ; metric also dictates that we will never have one pixel turned on
                    182: ; directly above another that's turned on.  We will also never have
                    183: ; a gap; i.e., there will be exactly one pixel turned on for each
                    184: ; column between the start and end points.  All that remains to be
                    185: ; done is to decide how many pixels should be turned on for each row.
                    186: ;
                    187: ; So lines we draw will consist of varying numbers of pixels on
                    188: ; successive rows, for example:
                    189: ;
                    190: ;       ******
                    191: ;             *****
                    192: ;                  ******
                    193: ;                        *****
                    194: ;
                    195: ; We'll call each set of pixels on a row a "strip".
                    196: ;
                    197: ; (Please remember that our coordinate space has the origin as the
                    198: ; upper left pixel on the screen; postive y is down and positive x
                    199: ; is right.)
                    200: ;
                    201: ; Device coordinates are specified as fixed point 28.4 numbers,
                    202: ; where the first 28 bits are the integer coordinate, and the last
                    203: ; 4 bits are the fraction.  So coordinates may be thought of as
                    204: ; having the form (x, y) = (M/F, N/F) where F is the constant scaling
                    205: ; factor F = 2^4 = 16, and M and N are 32 bit integers.
                    206: ;
                    207: ; Consider the line from (M0/F, N0/F) to (M1/F, N1/F) which runs
                    208: ; left-to-right and whose slope is in the first octant, and let
                    209: ; dM = M1 - M0 and dN = N1 - N0.  Then dM >= 0, dN >= 0 and dM >= dN.
                    210: ;
                    211: ; Since the slope of the line is less than 1, the edges of the
                    212: ; drag region are created by the top and bottom vertices of the
                    213: ; diamond.  At any given pixel row y of the line, we light those
                    214: ; pixels whose centers are between the left and right edges.
                    215: ;
                    216: ; Let mL(n) denote the line representing the left edge of the drag
                    217: ; region.  On pixel row j, the column of the first pixel to be
                    218: ; lit is
                    219: ;
                    220: ;       iL(j) = ceiling( mL(j * F) / F)
                    221: ;
                    222: ; Since the line's slope is less than one:
                    223: ;
                    224: ;       iL(j) = ceiling( mL([j + 1/2] F) / F )
                    225: ;
                    226: ; Recall the formula for our line:
                    227: ;
                    228: ;       n(m) = (dN / dM) (m - M0) + N0
                    229: ;
                    230: ;       m(n) = (dM / dN) (n - N0) + M0
                    231: ;
                    232: ; Since the line's slope is less than one, the line representing
                    233: ; the left edge of the drag region is the original line offset
                    234: ; by 1/2 pixel in the y direction:
                    235: ;
                    236: ;       mL(n) = (dM / dN) (n - F/2 - N0) + M0
                    237: ;
                    238: ; From this we can figure out the column of the first pixel that
                    239: ; will be lit on row j, being careful of rounding (if the left
                    240: ; edge lands exactly on an integer point, the pixel at that
                    241: ; point is not lit because of our rounding convention):
                    242: ;
                    243: ;       iL(j) = floor( mL(j F) / F ) + 1
                    244: ;
                    245: ;             = floor( ((dM / dN) (j F - F/2 - N0) + M0) / F ) + 1
                    246: ;
                    247: ;             = floor( F dM j - F/2 dM - N0 dM + dN M0) / F dN ) + 1
                    248: ;
                    249: ;                      F dM j - [ dM (N0 + F/2) - dN M0 ]
                    250: ;             = floor( ---------------------------------- ) + 1
                    251: ;                                   F dN
                    252: ;
                    253: ;                      dM j - [ dM (N0 + F/2) - dN M0 ] / F
                    254: ;             = floor( ------------------------------------ ) + 1       (1)
                    255: ;                                     dN
                    256: ;
                    257: ;             = floor( (dM j + alpha) / dN ) + 1
                    258: ;
                    259: ; where
                    260: ;
                    261: ;       alpha = - [ dM (N0 + F/2) - dN M0 ] / F
                    262: ;
                    263: ; We use equation (1) to calculate the DDA: there are iL(j+1) - iL(j)
                    264: ; pixels in row j.  Because we are always calculating iL(j) for
                    265: ; integer quantities of j, we note that the only fractional term
                    266: ; is constant, and so we can 'throw away' the fractional bits of
                    267: ; alpha:
                    268: ;
                    269: ;       beta = floor( - [ dM (N0 + F/2) - dN M0 ] / F )                 (2)
                    270: ;
                    271: ; so
                    272: ;
                    273: ;       iL(j) = floor( (dM j + beta) / dN ) + 1                         (3)
                    274: ;
                    275: ; for integers j.
                    276: ;
                    277: ; Note if iR(j) is the line's rightmost pixel on row j, that
                    278: ; iR(j) = iL(j + 1) - 1.
                    279: ;
                    280: ; Similarly, rewriting equation (1) as a function of column i,
                    281: ; we can determine, given column i, on which pixel row j is the line
                    282: ; lit:
                    283: ;
                    284: ;                       dN i + [ dM (N0 + F/2) - dN M0 ] / F
                    285: ;       j(i) = ceiling( ------------------------------------ ) - 1
                    286: ;                                       dM
                    287: ;
                    288: ; Floors are easier to compute, so we can rewrite this:
                    289: ;
                    290: ;                     dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F
                    291: ;       j(i) = floor( ----------------------------------------------- ) - 1
                    292: ;                                       dM
                    293: ;
                    294: ;                     dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F - dM
                    295: ;            = floor( ---------------------------------------------------- )
                    296: ;                                       dM
                    297: ;
                    298: ;                     dN i + [ dM (N0 + F/2) - dN M0 - 1 ] / F
                    299: ;            = floor( ---------------------------------------- )
                    300: ;                                       dM
                    301: ;
                    302: ; We can once again wave our hands and throw away the fractional bits
                    303: ; of the remainder term:
                    304: ;
                    305: ;       j(i) = floor( (dN i + gamma) / dM )                             (4)
                    306: ;
                    307: ; where
                    308: ;
                    309: ;       gamma = floor( [ dM (N0 + F/2) - dN M0 - 1 ] / F )              (5)
                    310: ;
                    311: ; We now note that
                    312: ;
                    313: ;       beta = -gamma - 1 = ~gamma                                      (6)
                    314: ;
                    315: ; To draw the pixels of the line, we could evaluate (3) on every scan
                    316: ; line to determine where the strip starts.  Of course, we don't want
                    317: ; to do that because that would involve a multiply and divide for every
                    318: ; scan.  So we do everything incrementally.
                    319: ;
                    320: ; We would like to easily compute c , the number of pixels on scan j:
                    321: ;                                  j
                    322: ;
                    323: ;    c  = iL(j + 1) - iL(j)
                    324: ;     j
                    325: ;
                    326: ;       = floor((dM (j + 1) + beta) / dN) - floor((dM j + beta) / dN)   (7)
                    327: ;
                    328: ; This may be rewritten as
                    329: ;
                    330: ;    c  = floor(i    + r    / dN) - floor(i  + r  / dN)                 (8)
                    331: ;     j          j+1    j+1                j    j
                    332: ;
                    333: ; where i , i    are integers and r  < dN, r    < dN.
                    334: ;        j   j+1                   j        j+1
                    335: ;
                    336: ; Rewriting (7) again:
                    337: ;
                    338: ;    c  = floor(i  + r  / dN + dM / dN) - floor(i  + r  / dN)
                    339: ;     j          j    j                          j    j
                    340: ;
                    341: ;
                    342: ;       = floor((r  + dM) / dN) - floor(r  / dN)
                    343: ;                 j                      j
                    344: ;
                    345: ; This may be rewritten as
                    346: ;
                    347: ;    c  = dI + floor((r  + dR) / dN) - floor(r  / dN)
                    348: ;     j                j                      j
                    349: ;
                    350: ; where dI + dR / dN = dM / dN, dI is an integer and dR < dN.
                    351: ;
                    352: ; r  is the remainder (or "error") term in the DDA loop: r  / dN
                    353: ;  j                                                      j
                    354: ; is the exact fraction of a pixel at which the strip ends.  To go
                    355: ; on to the next scan and compute c    we need to know r   .
                    356: ;                                  j+1                  j+1
                    357: ;
                    358: ; So in the main loop of the DDA:
                    359: ;
                    360: ;    c  = dI + floor((r  + dR) / dN) and r    = (r  + dR) % dN
                    361: ;     j                j                  j+1     j
                    362: ;
                    363: ; and we know r  < dN, r    < dN, and dR < dN.
                    364: ;              j        j+1
                    365: ;
                    366: ; We have derived the DDA only for lines in the first octant; to
                    367: ; handle other octants we do the common trick of flipping the line
                    368: ; to the first octant by first making the line left-to-right by
                    369: ; exchanging the end-points, then flipping about the lines y = 0 and
                    370: ; y = x, as necessary.  We must record the transformation so we can
                    371: ; undo them later.
                    372: ;
                    373: ; We must also be careful of how the flips affect our rounding.  If
                    374: ; to get the line to the first octant we flipped about x = 0, we now
                    375: ; have to be careful to round a y value of 1/2 up instead of down as
                    376: ; we would for a line originally in the first octant (recall that
                    377: ; "In the case where two pels are equidistant, the upper or left
                    378: ; pel is illuminated...").
                    379: ;
                    380: ; To account for this rounding when running the DDA, we shift the line
                    381: ; (or not) in the y direction by the smallest amount possible.  That
                    382: ; takes care of rounding for the DDA, but we still have to be careful
                    383: ; about the rounding when determining the first and last pixels to be
                    384: ; lit in the line.
                    385: ;
                    386: ; Determining The First And Last Pixels In The Line
                    387: ; -------------------------------------------------
                    388: ;
                    389: ; Fractional coordinates also make it harder to determine which pixels
                    390: ; will be the first and last ones in the line.  We've already taken
                    391: ; the fractional coordinates into account in calculating the DDA, but
                    392: ; the DDA cannot tell us which are the end pixels because it is quite
                    393: ; happy to calculate pixels on the line from minus infinity to positive
                    394: ; infinity.
                    395: ;
                    396: ; The diamond rule determines the start and end pixels.  (Recall that
                    397: ; the sides are exclusive except for the left and top vertices.)
                    398: ; This convention can be thought of in another way: there are diamonds
                    399: ; around the pixels, and wherever the true line crosses a diamond,
                    400: ; that pel is illuminated.
                    401: ;
                    402: ; Consider a line where we've done the flips to the first octant, and the
                    403: ; floor of the start coordinates is the origin:
                    404: ;
                    405: ;        +-----------------------> +x
                    406: ;        |
                    407: ;        | 0                     1
                    408: ;        |     0123456789abcdef
                    409: ;        |
                    410: ;        |   0 00000000?1111111
                    411: ;        |   1 00000000 1111111
                    412: ;        |   2 0000000   111111
                    413: ;        |   3 000000     11111
                    414: ;        |   4 00000    ** 1111
                    415: ;        |   5 0000       ****1
                    416: ;        |   6 000           1***
                    417: ;        |   7 00             1  ****
                    418: ;        |   8 ?                     ***
                    419: ;        |   9 22             3         ****
                    420: ;        |   a 222           33             ***
                    421: ;        |   b 2222         333                ****
                    422: ;        |   c 22222       3333                    **
                    423: ;        |   d 222222     33333
                    424: ;        |   e 2222222   333333
                    425: ;        |   f 22222222 3333333
                    426: ;        |
                    427: ;        | 2                     3
                    428: ;        v
                    429: ;        +y
                    430: ;
                    431: ; If the start of the line lands on the diamond around pixel 0 (shown by
                    432: ; the '0' region here), pixel 0 is the first pel in the line.  The same
                    433: ; is true for the other pels.
                    434: ;
                    435: ; A little more work has to be done if the line starts in the
                    436: ; 'nether-land' between the diamonds (as illustrated by the '*' line):
                    437: ; the first pel lit is the first diamond crossed by the line (pixel 1 in
                    438: ; our example).  This calculation is determined by the DDA or slope of
                    439: ; the line.
                    440: ;
                    441: ; If the line starts exactly half way between two adjacent pixels
                    442: ; (denoted here by the '?' spots), the first pixel is determined by our
                    443: ; round-down convention (and is dependent on the flips done to
                    444: ; normalize the line).
                    445: ;
                    446: ; Last Pel Exclusive
                    447: ; ------------------
                    448: ;
                    449: ; To eliminate repeatedly lit pels between continuous connected lines,
                    450: ; we employ a last-pel exclusive convention: if the line ends exactly on
                    451: ; the diamond around a pel, that pel is not lit.  (This eliminates the
                    452: ; checks we had in the old code to see if we were re-lighting pels.)
                    453: ;
                    454: ; The Half Flip
                    455: ; -------------
                    456: ;
                    457: ; To make our run length algorithm more efficient, we employ a "half
                    458: ; flip".  If after normalizing to the first octant, the slope is more
                    459: ; than 1/2, we subtract the y coordinate from the x coordinate.  This
                    460: ; has the effect of reflecting the coordinates through the line of slope
                    461: ; 1/2.  Note that the diagonal gets mapped into the x-axis after a half
                    462: ; flip.
                    463: ;
                    464: ; How Many Bits Do We Need, Anyway?
                    465: ; ---------------------------------
                    466: ;
                    467: ; Note that if the line is visible on your screen, you must light up
                    468: ; exactly the correct pixels, no matter where in the 28.4 x 28.4 device
                    469: ; space the end points of the line lie (meaning you must handle 32 bit
                    470: ; DDAs, you can certainly have optimized cases for lesser DDAs).
                    471: ;
                    472: ; We move the origin to (floor(M0 / F), floor(N0 / F)), so when we
                    473: ; calculate gamma from (5), we know that 0 <= M0, N0 < F.  And we
                    474: ; are in the first octant, so dM >= dN.  Then we know that gamma can
                    475: ; be in the range [(-1/2)dM, (3/2)dM].  The DDI guarantees us that
                    476: ; valid lines will have dM and dN values at most 31 bits (unsigned)
                    477: ; of significance.  So gamma requires 33 bits of significance (we store
                    478: ; this as a 64 bit number for convenience).
                    479: ;
                    480: ; When running through the DDA loop, r  + dR can have a value in the
                    481: ;                                     j
                    482: ; range 0 <= r  < 2 dN; thus the result must be a 32 bit unsigned value.
                    483: ;             j
                    484: ;
                    485: ; Testing Lines
                    486: ; -------------
                    487: ;
                    488: ; To be NT compliant, a display driver must exactly adhere to GIQ,
                    489: ; which means that for any given line, the driver must light exactly
                    490: ; the same pels as does GDI.  This can be tested using the Guiman tool
                    491: ; provided elsewhere in the DDK, and 'ZTest', which draws random lines
                    492: ; on the screen and to a bitmap, and compares the results.
                    493: ;
                    494: ; If You've Got Line Hardware
                    495: ; ---------------------------
                    496: ;
                    497: ; If your hardware already adheres to GIQ, you're all set.  Otherwise
                    498: ; you'll want to look at the S3 sample code and read the following:
                    499: ;
                    500: ; 1) You'll want to special case integer-only lines, since they require
                    501: ;    less processing time and are more common (CAD programs will probably
                    502: ;    only ever give integer lines).  GDI does not provide a flag saying
                    503: ;    that all lines in a path are integer lines; consequently, you will
                    504: ;    have to explicitly check every line.
                    505: ;
                    506: ; 2) You are required to correctly draw any line in the 28.4 device
                    507: ;    space that intersects the viewport.  If you have less than 32 bits
                    508: ;    of significance in the hardware for the Bresenham terms, extremely
                    509: ;    long lines would overflow the hardware.  For such (rare) cases, you
                    510: ;    can fall back to strip-drawing code, of which there is a C version in
                    511: ;    the S3's lines.cxx (or if your display is a frame buffer, fall back
                    512: ;    to the engine).
                    513: ;
                    514: ; 3) If you can explicitly set the Bresenham terms in your hardware, you
                    515: ;    can draw non-integer lines using the hardware.  If your hardware has
                    516: ;    'n' bits of precision, you can draw GIQ lines that are up to 2^(n-5)
                    517: ;    pels long (4 bits are required for the fractional part, and one bit is
                    518: ;    used as a sign bit).  Note that integer lines don't require the 4
                    519: ;    fractional bits, so if you special case them as in 1), you can do
                    520: ;    integer lines that are up to 2^(n - 1) pels long.  See the S3's
                    521: ;    fastline.asm for an example.
                    522: ;
                    523: ;-----------------------------------------------------------------------;
                    524: 
                    525: cProc   bLines,36,< \
                    526:     uses esi edi ebx,  \
                    527:     ppdev:     ptr,   \
                    528:     pptfxFirst: ptr,   \
                    529:     pptfxBuf:   ptr,   \
                    530:     prun:       ptr,   \
                    531:     cptfx:      dword, \
                    532:     pls:        ptr,   \
                    533:     prclClip:   ptr,   \
                    534:     apfn:       ptr,   \
                    535:     flStart:    dword  >
                    536: 
                    537: ; ppdev:     Surface data
                    538: ; pptfxFirst: Start point of first line
                    539: ; pptfxBuf:   All subsequent points
                    540: ; prun:       Array of runs if doing complex clipping
                    541: ; cptfx:      Number of points in pptfxBuf (i.e., # lines)
                    542: ; pls:        Line state
                    543: ; prclClip:   Clip rectangle if doing simple clipping
                    544: ; apfn:       Pointer to table of strip drawers
                    545: ; flStart:    Flags for all lines
                    546: 
                    547:         local cPelsAfterThisBank:    dword ; For bank switching
                    548:         local cStripsInNextRun:      dword ; For bank switching
                    549:         local pptfxBufEnd:           ptr   ; Last point in pptfxBuf
                    550:         local M0:                    dword ; Normalized x0 in device coords
                    551:         local dM:                    dword ; Delta-x in device coords
                    552:         local N0:                    dword ; Normalized y0 in device coords
                    553:         local dN:                    dword ; Delta-y in device coords
                    554:         local fl:                    dword ; Flags for current line
                    555:         local x:                     dword ; Normalized start pixel x-coord
                    556:         local y:                     dword ; Normalized start pixel y-coord
                    557:         local eqGamma_lo:            dword ; Upper 32 bits of Gamma
                    558:         local eqGamma_hi:            dword ; Lower 32 bits of Gamma
                    559:         local x0:                    dword ; Start pixel x-offset
                    560:         local y0:                    dword ; Start pixel y-offset
                    561:         local ulSlopeOneAdjustment:  dword ; Special offset if line of slope 1
                    562:         local cStylePels:            dword ; # of pixels in line (before clip)
                    563:         local xStart:                dword ; Start pixel x-offset before clip
                    564:         local pfn:                   ptr   ; Pointer to strip drawing function
                    565:         local cPels:                 dword ; # pixels to be drawn (after clip)
                    566:         local i:                     dword ; # pixels in strip
                    567:         local r:                     dword ; Remainder (or "error") term
                    568:         local d_I:                   dword ; Delta-I
                    569:         local d_R:                   dword ; Delta-R
                    570:         local plStripEnd:            ptr   ; Last strip in buffer
                    571:         local ptlStart[size POINTL]: byte  ; Unnormalized start coord
                    572:         local dN_Original:           dword ; dN before half-flip
                    573:         local xClipLeft:             dword ; Left side of clip rectangle
                    574:         local xClipRight:            dword ; Right side of clip rectangle
                    575:         local strip[size STRIPS]:    byte  ; Our strip buffer
                    576: 
                    577: ; Do some initializing:
                    578: 
                    579:         mov     esi, pls
                    580:         mov     ecx, cptfx
                    581:         mov     edx, pptfxBuf
                    582:         lea     eax, [edx + ecx * (size POINTL) - (size POINTL)]
                    583:         mov     pptfxBufEnd, eax        ; pptfxBufEnd is inclusive of end point
                    584: 
                    585:         mov     eax, [esi].LS_chAndXor  ; copy chAndXor from LINESTATE to STRIPS
                    586:         mov     strip.ST_chAndXor, eax  ;   buffer
                    587: 
                    588:         mov     eax, [edx].ptl_x        ; Load up end point (M1, N1)
                    589:         mov     edi, [edx].ptl_y
                    590: 
                    591:         mov     edx, pptfxFirst         ; Load up start point (M0, N0)
                    592:         mov     esi, [edx].ptl_x
                    593:         mov     ecx, [edx].ptl_y
                    594: 
                    595:         mov     ebx, flStart
                    596: 
                    597: ;-----------------------------------------------------------------------;
                    598: ; Flip to the first octant.                                             ;
                    599: ;-----------------------------------------------------------------------;
                    600: 
                    601: ; Register state:       esi = M0
                    602: ;                       ecx = N0
                    603: ;                       eax = dM (M1)
                    604: ;                       edi = dN (N1)
                    605: ;                       ebx = fl
                    606: 
                    607: ; Make sure we go left to right:
                    608: 
                    609:         public  the_main_loop
                    610: the_main_loop:
                    611:         cmp     esi, eax
                    612:         jle     short is_left_to_right  ; skip if M0 <= M1
                    613:         xchg    esi, eax                ; swap M0, M1
                    614:         xchg    ecx, edi                ; swap N0, N1
                    615:         or      ebx, FL_FLIP_H
                    616: 
                    617: is_left_to_right:
                    618: 
                    619: ; Compute the deltas, remembering that the DDI says we should get
                    620: ; deltas less than 2^31.  If we get more, we ensure we don't crash
                    621: ; later on by simply skipping the line:
                    622: 
                    623:         sub     eax, esi                ; eax = dM
                    624:         jo      next_line               ; dM must be less than 2^31
                    625:         sub     edi, ecx                ; edi = dN
                    626:         jo      next_line               ; dN must be less than 2^31
                    627: 
                    628:         jge     short is_top_to_bottom  ; skip if dN >= 0
                    629:         neg     ecx                     ; N0 = -N0
                    630:         neg     edi                     ; N1 = -N1
                    631:         or      ebx, FL_FLIP_V
                    632: 
                    633: is_top_to_bottom:
                    634:         cmp     edi, eax
                    635:         jb      short done_flips        ; skip if dN < dM
                    636:         jne     short slope_more_than_one
                    637: 
                    638: ; We must special case slopes of one (because of our rounding convention):
                    639: 
                    640:         or      ebx, FL_FLIP_SLOPE_ONE
                    641:         jmp     short done_flips
                    642: 
                    643: slope_more_than_one:
                    644:         xchg    eax, edi                ; swap dM, dN
                    645:         xchg    esi, ecx                ; swap M0, N0
                    646:         or      ebx, FL_FLIP_D
                    647: 
                    648: done_flips:
                    649: 
                    650:         mov     edx, ebx
                    651:         and     edx, FL_ROUND_MASK
                    652:         .errnz  FL_ROUND_SHIFT - 2
                    653:         or      ebx, [gaflRoundTable + edx]  ; get our rounding flags
                    654: 
                    655:         mov     dM, eax                 ; save some info
                    656:         mov     dN, edi
                    657:         mov     fl, ebx
                    658: 
                    659: ; We're going to shift our origin so that it's at the closest integer
                    660: ; coordinate to the left/above our fractional start point (it makes
                    661: ; the math quicker):
                    662: 
                    663:         mov     edx, esi                ; x = LFLOOR(M0)
                    664:         sar     edx, FLOG2
                    665:         mov     x, edx
                    666: 
                    667:         mov     edx, ecx                ; y = LFLOOR(N0)
                    668:         sar     edx, FLOG2
                    669:         mov     y, edx
                    670: 
                    671: ;-----------------------------------------------------------------------;
                    672: ; Compute the fractional remainder term                                 ;
                    673: ;-----------------------------------------------------------------------;
                    674: 
                    675: ; By shifting the origin we've contrived to eliminate the integer
                    676: ; portion of our fractional start point, giving us start point
                    677: ; fractional coordinates in the range [0, F - 1]:
                    678: 
                    679:         and     esi, F - 1              ; M0 = FXFRAC(M0)
                    680:         and     ecx, F - 1              ; N0 = FXFRAC(N0)
                    681: 
                    682: ; We now compute Gamma:
                    683: 
                    684:         mov     M0, esi                 ; save M0, N0 for later
                    685:         mov     N0, ecx
                    686: 
                    687:         lea     edx, [ecx + F/2]
                    688:         mul     edx                     ; [edx:eax] = dM * (N0 + F/2)
                    689:         xchg    eax, edi
                    690:         mov     ecx, edx                ; [ecx:edi] = dM * (N0 + F/2)
                    691:                                         ; (we just nuked N0)
                    692: 
                    693:         mul     esi                     ; [edx:eax] = dN * M0
                    694: 
                    695: ; Now gamma = dM * (N0 + F/2) - dN * M0 - bRoundDown
                    696: 
                    697:         .errnz  FL_V_ROUND_DOWN - 8000h
                    698:         ror     bh, 8
                    699:         sbb     edi, eax
                    700:         sbb     ecx, edx
                    701: 
                    702:         shrd    edi, ecx, FLOG2
                    703:         sar     ecx, FLOG2              ; gamma = [ecx:edi] >>= 4
                    704: 
                    705:         mov     eqGamma_hi, ecx
                    706:         mov     eqGamma_lo, edi
                    707: 
                    708:         mov     eax, N0
                    709: 
                    710: ; Register state:
                    711: ;                       eax = N0
                    712: ;                       ebx = fl
                    713: ;                       ecx = eqGamma_hi
                    714: ;                       edx = garbage
                    715: ;                       esi = M0
                    716: ;                       edi = eqGamma_lo
                    717: 
                    718:         testb   ebx, FL_FLIP_H
                    719:         jnz     line_runs_right_to_left
                    720: 
                    721: ;-----------------------------------------------------------------------;
                    722: ; Figure out which pixels are at the ends of a left-to-right line.      ;
                    723: ;                               -------->                               ;
                    724: ;-----------------------------------------------------------------------;
                    725: 
                    726:         public line_runs_left_to_right
                    727: line_runs_left_to_right:
                    728:         or      esi, esi
                    729:         jz      short LtoR_check_slope_one
                    730:                                         ; skip ahead if M0 == 0
                    731:                                         ;   (in that case, x0 = 0 which is to be
                    732:                                         ;   kept in esi, and is already
                    733:                                         ;   conventiently zero)
                    734: 
                    735:         or      eax, eax
                    736:         jnz     short LtoR_N0_not_zero
                    737: 
                    738:         .errnz  FL_H_ROUND_DOWN - 80h
                    739:         ror     bl, 8
                    740:         sbb     esi, -F/2
                    741:         shr     esi, FLOG2
                    742:         jmp     short LtoR_check_slope_one
                    743:                                         ; esi = x0 = rounded M0
                    744: 
                    745: LtoR_N0_not_zero:
                    746:         sub     eax, F/2
                    747:         sbb     edx, edx
                    748:         xor     eax, edx
                    749:         sub     eax, edx
                    750:         cmp     esi, eax
                    751:         sbb     esi, esi
                    752:         inc     esi                     ; esi = x0 = (abs(N0 - F/2) <= M0)
                    753: 
                    754:         public  LtoR_check_slope_one
                    755: LtoR_check_slope_one:
                    756:         mov     ulSlopeOneAdjustment, 0
                    757:         mov     eax, ebx
                    758:         and     eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN
                    759:         cmp     eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN
                    760:         jne     short LtoR_compute_y0_from_x0
                    761: 
                    762: ; We have to special case lines that are exactly of slope 1 or -1:
                    763: 
                    764:         mov     eax, N0
                    765:         add     eax, dN
                    766:         and     eax, F - 1              ; eax = N1
                    767:         jz      short LtoR_slope_one_check_start_point
                    768: 
                    769:         mov     edx, M0
                    770:         add     edx, dM
                    771:         and     edx, F - 1              ; edx = M1
                    772: 
                    773:         add     eax, F/2
                    774:         cmp     edx, eax                ; cmp M1, N1 + F/2
                    775:         jne     short LtoR_slope_one_check_start_point
                    776:         mov     ulSlopeOneAdjustment, -1
                    777: 
                    778: LtoR_slope_one_check_start_point:
                    779:         mov     eax, M0
                    780:         or      eax, eax
                    781:         jz      short LtoR_compute_y0_from_x0
                    782: 
                    783:         add     eax, F/2
                    784:         cmp     eax, N0                 ; cmp M0 + 8, N0
                    785:         jne     short LtoR_compute_y0_from_x0
                    786: 
                    787:         xor     esi, esi                ; x0 = 0
                    788: 
                    789: LtoR_compute_y0_from_x0:
                    790: 
                    791: ; ecx = eqGamma_hi
                    792: ; esi = x0
                    793: ; edi = eqGamma_lo
                    794: 
                    795:         mov     eax, dN
                    796:         mov     edx, dM
                    797: 
                    798:         mov     x0, esi
                    799:         mov     y0, 0
                    800:         cmp     ecx, 0
                    801:         jl      short LtoR_compute_x1
                    802: 
                    803:         neg     esi
                    804:         and     esi, eax
                    805:         sub     edx, esi
                    806:         cmp     edi, edx
                    807:         mov     edx, dM
                    808:         jl      short LtoR_compute_x1
                    809:         mov     y0, 1                   ; y0 = floor((dN * x0 + eqGamma) / dM)
                    810: 
                    811: LtoR_compute_x1:
                    812: 
                    813: ; Register state:
                    814: ;                       eax = dN
                    815: ;                       ebx = fl
                    816: ;                       ecx = garbage
                    817: ;                       edx = dM
                    818: ;                       esi = garbage
                    819: ;                       edi = garbage
                    820: 
                    821:         mov     esi, M0
                    822:         add     esi, edx
                    823:         mov     ecx, esi
                    824:         shr     esi, FLOG2
                    825:         dec     esi                     ; x1 = ((M0 + dM) >> 4) - 1
                    826:         add     esi, ulSlopeOneAdjustment
                    827:         and     ecx, F-1                ; M1 = (M0 + dM) & 15
                    828:         jz      done_first_pel_last_pel
                    829: 
                    830:         add     eax, N0
                    831:         and     eax, F-1                ; N1 = (N0 + dN) & 15
                    832:         jnz     short LtoR_N1_not_zero
                    833: 
                    834:         .errnz  FL_H_ROUND_DOWN - 80h
                    835:         ror     bl, 8
                    836:         sbb     ecx, -F/2
                    837:         shr     ecx, FLOG2              ; ecx = LROUND(M1, fl & FL_ROUND_DOWN)
                    838:         add     esi, ecx
                    839:         jmp     done_first_pel_last_pel
                    840: 
                    841: LtoR_N1_not_zero:
                    842:         sub     eax, F/2
                    843:         sbb     edx, edx
                    844:         xor     eax, edx
                    845:         sub     eax, edx
                    846:         cmp     eax, ecx
                    847:         jg      done_first_pel_last_pel
                    848:         inc     esi
                    849:         jmp     done_first_pel_last_pel
                    850: 
                    851: ;-----------------------------------------------------------------------;
                    852: ; Figure out which pixels are at the ends of a right-to-left line.      ;
                    853: ;                               <--------                               ;
                    854: ;-----------------------------------------------------------------------;
                    855: 
                    856: ; Compute x0:
                    857: 
                    858:         public  line_runs_right_to_left
                    859: line_runs_right_to_left:
                    860:         mov     x0, 1                   ; x0 = 1
                    861:         or      eax, eax
                    862:         jnz     short RtoL_N0_not_zero
                    863: 
                    864:         xor     edx, edx                ; ulDelta = 0
                    865:         .errnz  FL_H_ROUND_DOWN - 80h
                    866:         ror     bl, 8
                    867:         sbb     esi, -F/2
                    868:         shr     esi, FLOG2              ; esi = LROUND(M0, fl & FL_H_ROUND_DOWN)
                    869:         jz      short RtoL_check_slope_one
                    870: 
                    871:         mov     x0, 2
                    872:         mov     edx, dN
                    873:         jmp     short RtoL_check_slope_one
                    874: 
                    875: RtoL_N0_not_zero:
                    876:         sub     eax, F/2
                    877:         sbb     edx, edx
                    878:         xor     eax, edx
                    879:         sub     eax, edx
                    880:         add     eax, esi                ; eax = ABS(N0 - F/2) + M0
                    881:         xor     edx, edx                ; ulDelta = 0
                    882:         cmp     eax, F
                    883:         jle     short RtoL_check_slope_one
                    884: 
                    885:         mov     x0, 2                   ; x0 = 2
                    886:         mov     edx, dN                 ; ulDelta = dN
                    887: 
                    888:         public  RtoL_check_slope_one
                    889: RtoL_check_slope_one:
                    890:         mov     ulSlopeOneAdjustment, 0
                    891:         mov     eax, ebx
                    892:         and     eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN
                    893:         cmp     eax, FL_FLIP_SLOPE_ONE
                    894:         jne     short RtoL_compute_y0_from_x0
                    895: 
                    896: ; We have to special case lines that are exactly of slope 1 or -1:
                    897: 
                    898:         mov     eax, N0
                    899:         add     eax, dN
                    900:         and     eax, F - 1              ; eax = N1
                    901:         jz      short RtoL_slope_one_check_start_point
                    902: 
                    903:         mov     esi, M0
                    904:         add     esi, dM
                    905:         and     esi, F - 1              ; esi = M1
                    906: 
                    907:         add     eax, F/2
                    908:         cmp     esi, eax                ; cmp M1, N1 + F/2
                    909:         jne     short RtoL_slope_one_check_start_point
                    910:         mov     ulSlopeOneAdjustment, 1
                    911: 
                    912: RtoL_slope_one_check_start_point:
                    913:         mov     eax, M0
                    914:         or      eax, eax
                    915:         jz      short RtoL_compute_y0_from_x0
                    916: 
                    917:         add     eax, F/2
                    918:         cmp     eax, N0                 ; cmp M0 + 8, N0
                    919:         jne     short RtoL_compute_y0_from_x0
                    920: 
                    921:         mov     x0, 2                   ; x0 = 2
                    922:         mov     edx, dN                 ; ulDelta = dN
                    923: 
                    924: RtoL_compute_y0_from_x0:
                    925: 
                    926: ; eax = garbage
                    927: ; ebx = fl
                    928: ; ecx = eqGamma_hi
                    929: ; edx = ulDelta
                    930: ; esi = garbage
                    931: ; edi = eqGamma_lo
                    932: 
                    933:         mov     eax, dN                 ; eax = dN
                    934:         mov     y0, 0                   ; y0 = 0
                    935: 
                    936:         add     edi, edx
                    937:         adc     ecx, 0                  ; eqGamma += ulDelta
                    938:                                         ; NOTE: Setting flags here!
                    939:         mov     edx, dM                 ; edx = dM
                    940:         jl      short RtoL_compute_x1   ; NOTE: Looking at the flags here!
                    941:         jg      short RtoL_y0_is_2
                    942: 
                    943:         lea     ecx, [edx + edx]
                    944:         sub     ecx, eax                ; ecx = 2 * dM - dN
                    945:         cmp     edi, ecx
                    946:         jge     short RtoL_y0_is_2
                    947: 
                    948:         sub     ecx, edx                ; ecx = dM - dN
                    949:         cmp     edi, ecx
                    950:         jl      short RtoL_compute_x1
                    951: 
                    952:         mov     y0, 1
                    953:         jmp     short RtoL_compute_x1
                    954: 
                    955: RtoL_y0_is_2:
                    956:         mov     y0, 2
                    957: 
                    958: RtoL_compute_x1:
                    959: 
                    960: ; Register state:
                    961: ;                       eax = dN
                    962: ;                       ebx = fl
                    963: ;                       ecx = garbage
                    964: ;                       edx = dM
                    965: ;                       esi = garbage
                    966: ;                       edi = garbage
                    967: 
                    968:         mov     esi, M0
                    969:         add     esi, edx
                    970:         mov     ecx, esi
                    971:         shr     esi, FLOG2              ; x1 = (M0 + dM) >> 4
                    972:         add     esi, ulSlopeOneAdjustment
                    973:         and     ecx, F-1                ; M1 = (M0 + dM) & 15
                    974: 
                    975:         add     eax, N0
                    976:         and     eax, F-1                ; N1 = (N0 + dN) & 15
                    977:         jnz     short RtoL_N1_not_zero
                    978: 
                    979:         .errnz  FL_H_ROUND_DOWN - 80h
                    980:         ror     bl, 8
                    981:         sbb     ecx, -F/2
                    982:         shr     ecx, FLOG2              ; ecx = LROUND(M1, fl & FL_ROUND_DOWN)
                    983:         add     esi, ecx
                    984:         jmp     done_first_pel_last_pel
                    985: 
                    986: RtoL_N1_not_zero:
                    987:         sub     eax, F/2
                    988:         sbb     edx, edx
                    989:         xor     eax, edx
                    990:         sub     eax, edx
                    991:         add     eax, ecx                ; eax = ABS(N1 - F/2) + M1
                    992:         cmp     eax, F+1
                    993:         sbb     esi, -1
                    994: 
                    995: done_first_pel_last_pel:
                    996: 
                    997: ; Register state:
                    998: ;                       eax = garbage
                    999: ;                       ebx = fl
                   1000: ;                       ecx = garbage
                   1001: ;                       edx = garbage
                   1002: ;                       esi = x1
                   1003: ;                       edi = garbage
                   1004: 
                   1005:         mov     ecx, x0
                   1006:         lea     edx, [esi + 1]
                   1007:         sub     edx, ecx                ; edx = x1 - x0 + 1
                   1008: 
                   1009:         jle     next_line
                   1010:         mov     cStylePels, edx
                   1011:         mov     xStart, ecx
                   1012: 
                   1013: ;-----------------------------------------------------------------------;
                   1014: ; See if clipping or styling needs to be done.                          ;
                   1015: ;-----------------------------------------------------------------------;
                   1016: 
                   1017:         testb   ebx, FL_CLIP
                   1018:         jnz     do_some_clipping
                   1019: 
                   1020: ; Register state:
                   1021: ;                       eax = garbage
                   1022: ;                       ebx = fl
                   1023: ;                       ecx = x0        (stack variable correct too)
                   1024: ;                       edx = garbage
                   1025: ;                       esi = x1
                   1026: ;                       edi = garbage
                   1027: 
                   1028: done_clipping:
                   1029:         mov     eax, y0
                   1030: 
                   1031:         sub     esi, ecx
                   1032:         inc     esi                     ; esi = cPels = x1 - x0 + 1
                   1033:         mov     cPels, esi
                   1034: 
                   1035:         mov     esi, ppdev
                   1036:         add     ecx, x                  ; ecx = ptlStart.ptl_x
                   1037:         add     eax, y                  ; eax = ptlStart.ptl_y
                   1038: 
                   1039:         mov     esi, [esi].pdev_lNextScan ; we'll compute the sign of lNextScan
                   1040: 
                   1041:         testb   ebx, FL_FLIP_D
                   1042:         jz      short do_v_unflip
                   1043:         xchg    ecx, eax
                   1044: 
                   1045: do_v_unflip:
                   1046:         testb   ebx, FL_FLIP_V
                   1047:         jz      short done_unflips
                   1048:         neg     eax
                   1049:         neg     esi
                   1050: 
                   1051: done_unflips:
                   1052:         mov     strip.ST_lNextScan, esi ; lNextScan now right for y-direction
                   1053:         testb   ebx, FL_STYLED
                   1054:         jnz     do_some_styling
                   1055: 
                   1056: done_styling:
                   1057:         lea     edx, [strip.ST_alStrips + (STRIP_MAX * 4)]
                   1058:         mov     plStripEnd, edx
                   1059: 
                   1060:         mov     cPelsAfterThisBank, 0
                   1061:         mov     cStripsInNextRun, 7fffffffh
                   1062: 
                   1063: ;-----------------------------------------------------------------------;
                   1064: ; Do banking setup.                                                     ;
                   1065: ;-----------------------------------------------------------------------;
                   1066: 
                   1067:         public  bank_setup
                   1068: bank_setup:
                   1069: 
                   1070: ; Register state:
                   1071: ;                       eax = ptlStart.ptl_y
                   1072: ;                       ebx = fl
                   1073: ;                       ecx = ptlStart.ptl_x
                   1074: ;                       edx = garbage
                   1075: ;                       esi = garbage
                   1076: ;                       edi = garbage
                   1077: 
                   1078:         mov     esi, ppdev
                   1079:         cmp     eax, [esi].pdev_rcl1WindowClip.yTop
                   1080:         jl      short bank_get_initial_bank   ; ptlStart.y < rcl1WindowClip.yTop
                   1081: 
                   1082:         cmp     eax, [esi].pdev_rcl1WindowClip.yBottom
                   1083:         jl      short bank_got_initial_bank   ; ptlStart.y < rcl1WindowClip.yBot
                   1084: 
                   1085: bank_get_initial_bank:
                   1086:         mov     ptlStart.ptl_y, eax     ; Save ptlStart.ptl_y
                   1087:         mov     edi, ecx                ; Save ptlStart.ptl_x
                   1088: 
                   1089:         .errnz  JustifyTop
                   1090:         .errnz  JustifyBottom - 1
                   1091:         .errnz  FL_FLIP_V - 8
                   1092: 
                   1093:         mov     ecx, ebx                ; JustifyTop if line goes down,
                   1094:         shr     ecx, 3                  ; JustifyBottom if line goes up
                   1095:         and     ecx, 1
                   1096: 
                   1097: bank_justified:
                   1098:         ptrCall <dword ptr [esi].pdev_pfnBankControl>, \
                   1099:                 <esi, eax, ecx>
                   1100: 
                   1101:         mov     eax, ptlStart.ptl_y
                   1102:         mov     ecx, edi
                   1103: 
                   1104: bank_got_initial_bank:
                   1105:         testb   ebx, FL_FLIP_D
                   1106:         jz      short bank_major_x
                   1107: 
                   1108: bank_major_y:
                   1109:         testb   ebx, FL_FLIP_V
                   1110:         jz      short bank_major_y_down
                   1111: bank_major_y_up:
                   1112:         lea     edi, [eax + 1]
                   1113:         sub     edi, [esi].pdev_rcl1WindowClip.yTop
                   1114:         jmp     short bank_done_y_major
                   1115: bank_major_y_down:
                   1116:         mov     edi, [esi].pdev_rcl1WindowClip.yBottom
                   1117:         sub     edi, eax
                   1118: bank_done_y_major:
                   1119:         mov     esi, cPels
                   1120:         sub     esi, edi                ; edi = cPelsInBank
                   1121:         mov     cPelsAfterThisBank, esi
                   1122:         jle     short done_bank_setup
                   1123:         mov     cPels, edi
                   1124:         jmp     short done_bank_setup
                   1125: 
                   1126: bank_major_x:
                   1127:         mov     edi, dN
                   1128:         shr     edi, FLOG2
                   1129:         add     edi, y
                   1130: 
                   1131: ; We're guessing at the y-position of the end pixel (it's too much work
                   1132: ; to compute the actual value) to see if the line spans more than one
                   1133: ; bank.  We have to add at least a slop value of '3' because the actual
                   1134: ; start pixel may be may 2 off from 'y' because of end-pixel exclusiveness,
                   1135: ; and we have to add 1 more because we're taking the floor of (dN / F), to
                   1136: ; account for rounding:
                   1137: 
                   1138:         add     edi, 3                  ; yEnd = edi = y + LFLOOR(dN) + 3
                   1139:         testb   ebx, FL_FLIP_V
                   1140:         jz      short bank_major_x_down
                   1141: bank_major_x_up:
                   1142:         mov     edx, 1
                   1143:         sub     edx, [esi].pdev_rcl1WindowClip.yTop    ; edx = -yNextBankStart
                   1144: 
                   1145:         cmp     edi, edx
                   1146:         lea     edx, [edx + eax]        ; edx = cStripsInNextRun
                   1147:         jl      short bank_major_x_done
                   1148: 
                   1149: ; Line may go over bank boundary, so don't do a half flip:
                   1150: 
                   1151:         or      ebx, FL_DONT_DO_HALF_FLIP
                   1152:         jmp     short bank_major_x_done
                   1153: 
                   1154: bank_major_x_down:
                   1155:         mov     esi, [esi].pdev_rcl1WindowClip.yBottom  ; esi = yNextBankStart
                   1156: 
                   1157:         mov     edx, esi
                   1158:         sub     edx, eax                ; edx = cStripsInNextRun
                   1159: 
                   1160:         cmp     edi, esi
                   1161:         jl      short bank_major_x_done
                   1162:         or      ebx, FL_DONT_DO_HALF_FLIP
                   1163: 
                   1164: bank_major_x_done:
                   1165:         sub     edx, STRIP_MAX
                   1166:         mov     cStripsInNextRun, edx
                   1167:         jge     short done_bank_setup
                   1168: 
                   1169:         lea     edx, [strip.ST_alStrips + edx * 4 + (STRIP_MAX * 4)]
                   1170:         mov     plStripEnd, edx
                   1171: 
                   1172: done_bank_setup:
                   1173: 
                   1174: ;-----------------------------------------------------------------------;
                   1175: ; Setup to do DDA.                                                      ;
                   1176: ;-----------------------------------------------------------------------;
                   1177: 
                   1178: ; Register state:
                   1179: ;                       eax = ptlStart.ptl_y
                   1180: ;                       ebx = fl
                   1181: ;                       ecx = ptlStart.ptl_x
                   1182: ;                       edx = garbage
                   1183: ;                       esi = garbage
                   1184: ;                       edi = garbage
                   1185: 
                   1186:         mov     esi, ppdev
                   1187:         mov     edi, eax                ; Now edi = ptlStart.ptl_y
                   1188:         imul    [esi].pdev_lNextScan
                   1189:         add     eax, [esi].pdev_pvBitmapStart
                   1190:         add     eax, ecx
                   1191:         mov     strip.ST_pjScreen, eax  ; pjScreen = pchBits + ptlStart.y *
                   1192:                                         ;   cjDelta + ptlStart.x
                   1193: 
                   1194:         mov     eax, dM
                   1195:         mov     ecx, dN
                   1196:         mov     esi, eqGamma_lo
                   1197:         mov     edi, eqGamma_hi
                   1198: 
                   1199: ; Register state:
                   1200: ;                       eax = dM
                   1201: ;                       ebx = fl
                   1202: ;                       ecx = dN
                   1203: ;                       edx = garbage
                   1204: ;                       esi = eqGamma_lo
                   1205: ;                       edi = eqGamma_hi
                   1206: 
                   1207:         lea     edx, [ecx + ecx]        ; if (2 * dN > dM)
                   1208:         cmp     edx, eax
                   1209:         mov     edx, y0                 ; Load y0 again
                   1210:         jbe     short after_half_flip
                   1211: 
                   1212:         test    ebx, FL_DONT_DO_HALF_FLIP
                   1213:         jnz     short after_half_flip
                   1214: 
                   1215:         or      ebx, FL_FLIP_HALF
                   1216:         mov     fl, ebx
                   1217: 
                   1218: ; Do a half flip!
                   1219: 
                   1220:         not     esi
                   1221:         not     edi
                   1222:         add     esi, eax
                   1223:         adc     edi, 0                  ; eqGamma = -eqGamma - 1 + dM
                   1224: 
                   1225:         neg     ecx
                   1226:         add     ecx, eax                ; dN = dM - dN
                   1227: 
                   1228:         neg     edx
                   1229:         add     edx, x0                 ; y0 = x0 - y0
                   1230: 
                   1231: after_half_flip:
                   1232:         mov     strip.ST_flFlips, ebx
                   1233:         and     ebx, FL_STRIP_MASK
                   1234: 
                   1235:         .errnz  FL_STRIP_SHIFT
                   1236:         mov     eax, apfn
                   1237:         lea     eax, [eax + ebx * 4]
                   1238:         mov     eax, [eax]
                   1239:         mov     pfn, eax
                   1240:         mov     eax, dM
                   1241: 
                   1242: ; Register state:
                   1243: ;                       eax = dM
                   1244: ;                       ebx = garbage
                   1245: ;                       ecx = dN
                   1246: ;                       edx = y0
                   1247: ;                       esi = eqGamma_lo
                   1248: ;                       edi = eqGamma_hi
                   1249: 
                   1250:         or      ecx, ecx
                   1251:         jz      short zero_slope
                   1252: 
                   1253: compute_dda_stuff:
                   1254:         inc     edx
                   1255:         mul     edx
                   1256:         stc                             ; set the carry to accomplish -1
                   1257:         sbb     eax, esi
                   1258:         sbb     edx, edi                ; (y0 + 1) * dM - eqGamma - 1
                   1259:         div     ecx
                   1260: 
                   1261:         mov     esi, eax                ; esi = i
                   1262:         mov     edi, edx                ; edi = r
                   1263: 
                   1264:         xor     edx, edx
                   1265:         mov     eax, dM
                   1266:         div     ecx                     ; edx = d_R, eax = d_I
                   1267:         mov     d_I, eax
                   1268: 
                   1269:         sub     esi, x0
                   1270:         inc     esi
                   1271: 
                   1272: done_dda_stuff:
                   1273:         lea     eax, [strip.ST_alStrips]
                   1274:         mov     ebx, cPels
                   1275: 
                   1276: ;-----------------------------------------------------------------------;
                   1277: ; Do our main DDA loop.                                                 ;
                   1278: ;-----------------------------------------------------------------------;
                   1279: 
                   1280:         sub     edi, ecx                ; offset remainder term from [0..dN)
                   1281:                                         ;   to [-dN..0) so test in inner
                   1282:                                         ;   loop is quicker
                   1283:         align   4
                   1284: 
                   1285: ; Register state:
                   1286: ;                       eax = plStrip   ; current pointer into strip array
                   1287: ;                       ebx = cPels     ; total number of pels in line
                   1288: ;                       ecx = dN        ; delta-N = rise in line
                   1289: ;                       edx = d_R       ; d_I + d_R/dN = exact strip length
                   1290: ;                       esi = i         ; length of current strip
                   1291: ;                       edi = r         ; remainder term for current strip
                   1292: ;                                       ;   in range [-dN..0)
                   1293: 
                   1294:         public  dda_loop
                   1295: dda_loop:
                   1296:         sub     ebx, esi                ; subtract strip length from line length
                   1297:         jle     final_strip             ; if negative, done with line
                   1298: 
                   1299:         mov     [eax], esi              ; write strip length to strip array
                   1300:         add     eax, 4
                   1301:         cmp     plStripEnd, eax         ; is the strip array buffer full?
                   1302:         jbe     short output_strips     ; if so, empty it
                   1303: 
                   1304: ; The output_strips routine jumps to here when done:
                   1305: 
                   1306: done_output_strips:
                   1307:         mov     esi, d_I                ; our normal strip length
                   1308:         add     edi, edx                ; adjust our remainder term
                   1309:         jl      short dda_loop
                   1310: 
                   1311:         sub     edi, ecx                ; our remainder became 1 or more, so
                   1312:         inc     esi                     ;   we increment this strip length
                   1313:                                         ;   and adjust the remainder term
                   1314: 
                   1315: ; We've unrolled our loop a bit, so this should look familiar to the above:
                   1316: 
                   1317:         sub     ebx, esi                ; subtract strip length from line length
                   1318:         jle     final_strip             ; if negative, done with line
                   1319: 
                   1320:         mov     [eax], esi              ; write strip length to strip array
                   1321:         add     eax, 4                  ; adjust strip pointer
                   1322: 
                   1323: ; Note that banking requires us to check if the strip array is full here
                   1324: ; too (and note that if output_strips is called it will return to
                   1325: ; done_output_strips):
                   1326: 
                   1327:         cmp     plStripEnd, eax
                   1328:         jbe     short output_strips
                   1329: 
                   1330:         mov     esi, d_I                ; our normal strip length
                   1331:         add     edi, edx                ; adjust our remainder term
                   1332:         jl      short dda_loop
                   1333: 
                   1334:         sub     edi, ecx                ; our remainder became 1 or more, so
                   1335:         inc     esi                     ; adjust
                   1336:         jmp     short dda_loop
                   1337: 
                   1338: zero_slope:
                   1339:         mov     esi, 7fffffffh
                   1340:         jmp     short done_dda_stuff
                   1341: 
                   1342: ;-----------------------------------------------------------------------;
                   1343: ; Empty strips buffer & possibly do x-major bank switch.                ;
                   1344: ;-----------------------------------------------------------------------;
                   1345: 
                   1346: output_strips:
                   1347:         mov     d_R, edx
                   1348:         mov     cPels, ebx
                   1349:         mov     i, esi
                   1350:         mov     r, edi
                   1351:         mov     dN, ecx
                   1352: 
                   1353:         lea     edx, [strip]
                   1354:         mov     ecx, pls
                   1355: 
                   1356: ; Call our strip routine:
                   1357: 
                   1358:         ptrCall <dword ptr pfn>, \
                   1359:                 <edx, ecx, eax>
                   1360: 
                   1361: ; It may be that we ran out of run in our strips buffer, and don't
                   1362: ; actually have to switch banks.  See if that's the case:
                   1363: 
                   1364:         mov     eax, cStripsInNextRun
                   1365:         or      eax, eax
                   1366:         jg      short done_strip_bank_switch
                   1367: 
                   1368: ; We have to switch banks.  See if we're going up or down:
                   1369: 
                   1370:         mov     esi, ppdev
                   1371:         test    fl, FL_FLIP_V
                   1372:         jz      short bank_x_down
                   1373: 
                   1374: bank_x_up:
                   1375:         mov     edi, strip.ST_pjScreen
                   1376:         sub     edi, [esi].pdev_pvBitmapStart
                   1377:         mov     ebx, [esi].pdev_rcl1WindowClip.yTop
                   1378:         dec     ebx                     ; we want yTop - 1 to be mapped in
                   1379: 
                   1380: ; Map in the next higher bank:
                   1381: 
                   1382:         ptrCall <dword ptr [esi].pdev_pfnBankControl>, \
                   1383:                 <esi, ebx, JustifyBottom>; ebx, esi and edi are preserved
                   1384: 
                   1385:         lea     eax, [ebx + 1]
                   1386:         sub     eax, [esi].pdev_rcl1WindowClip.yTop
                   1387:                                         ; eax = # of scans can do in bank
                   1388: 
                   1389:         add     edi, [esi].pdev_pvBitmapStart
                   1390:         mov     strip.ST_pjScreen, edi
                   1391: 
                   1392:         jmp     short done_strip_bank_switch
                   1393: 
                   1394: bank_x_down:
                   1395:         mov     edi, strip.ST_pjScreen
                   1396:         sub     edi, [esi].pdev_pvBitmapStart
                   1397:         mov     ebx, [esi].pdev_rcl1WindowClip.yBottom
                   1398: 
                   1399: ; Map in the next lower bank:
                   1400: 
                   1401:         ptrCall <dword ptr [esi].pdev_pfnBankControl>, \
                   1402:                 <esi, ebx, JustifyTop>  ; ebx, esi and edi are preserved
                   1403: 
                   1404:         mov     eax, [esi].pdev_rcl1WindowClip.yBottom
                   1405:         sub     eax, ebx                ; eax = # scans can do in bank
                   1406: 
                   1407:         add     edi, [esi].pdev_pvBitmapStart
                   1408:         mov     strip.ST_pjScreen,edi
                   1409: 
                   1410: done_strip_bank_switch:
                   1411: 
                   1412: ; eax = cStripsInNextRun
                   1413: 
                   1414:         lea     edx, [strip.ST_alStrips + (STRIP_MAX * 4)]
                   1415:         sub     eax, STRIP_MAX
                   1416:         mov     cStripsInNextRun, eax
                   1417:         jge     short get_ready_for_more_strips
                   1418:         lea     edx, [edx + eax * 4]
                   1419: 
                   1420: get_ready_for_more_strips:
                   1421:         mov     plStripEnd, edx
                   1422: 
                   1423:         mov     esi, i
                   1424:         mov     edi, r
                   1425:         mov     ebx, cPels
                   1426:         mov     edx, d_R
                   1427:         mov     ecx, dN
                   1428:         lea     eax, [strip.ST_alStrips]
                   1429:         jmp     done_output_strips
                   1430: 
                   1431: ;-----------------------------------------------------------------------;
                   1432: ; Empty strips buffer.  Either get new line or do y-major bank switch.  ;
                   1433: ;-----------------------------------------------------------------------;
                   1434: 
                   1435: final_strip:
                   1436:         add     ebx, esi
                   1437:         mov     [eax], ebx
                   1438:         add     eax, 4
                   1439: 
                   1440:         cmp     cPelsAfterThisBank, 0
                   1441:         jg      short bank_y_major
                   1442: 
                   1443: very_final_strip:
                   1444:         lea     edx, [strip]
                   1445:         mov     ecx, pls
                   1446: 
                   1447:         ptrCall <dword ptr pfn>, \
                   1448:                 <edx, ecx, eax>
                   1449: 
                   1450: ; NOTE: next_line is jumped to from various places, and it cannot assume
                   1451: ;       any registers are loaded.
                   1452: 
                   1453: next_line:
                   1454:         mov     ebx, flStart
                   1455:         testb   ebx, FL_COMPLEX_CLIP
                   1456:         jnz     short see_if_done_complex_clipping
                   1457: 
                   1458:         mov     edx, pptfxBuf
                   1459:         cmp     edx, pptfxBufEnd
                   1460:         je      short all_done
                   1461: 
                   1462:         mov     esi, [edx].ptl_x
                   1463:         mov     ecx, [edx].ptl_y
                   1464:         add     edx, size POINTL
                   1465:         mov     pptfxBuf, edx
                   1466:         mov     eax, [edx].ptl_x
                   1467:         mov     edi, [edx].ptl_y
                   1468:         jmp     the_main_loop
                   1469: 
                   1470: all_done:
                   1471:         mov     eax, 1
                   1472: 
                   1473:         cRet    bLines
                   1474: 
                   1475: see_if_done_complex_clipping:
                   1476:         mov     ebx, fl
                   1477:         dec     cptfx
                   1478:         jz      short all_done
                   1479: 
                   1480:         and     ebx, NOT FL_FLIP_HALF   ; Make sure the next run doesn't have
                   1481:         mov     fl, ebx                 ;   to do a half-flip if it doesn't
                   1482:                                         ;   want to
                   1483:         jmp     continue_complex_clipping
                   1484: 
                   1485: ;-----------------------------------------------------------------------;
                   1486: ; Switch banks for a y-major line.                                      ;
                   1487: ;-----------------------------------------------------------------------;
                   1488: 
                   1489:         public  bank_y_major
                   1490: bank_y_major:
                   1491:         mov     d_R, edx
                   1492:         mov     i, esi
                   1493:         mov     r, edi
                   1494:         mov     dN, ecx
                   1495:         sub     ebx, esi                ; Undo our offset
                   1496: 
                   1497: bank_y_output_strips:
                   1498:         lea     edx, [strip]
                   1499:         mov     ecx, pls
                   1500: 
                   1501:         ptrCall <dword ptr pfn>, \
                   1502:                 <edx, ecx, eax>
                   1503: 
                   1504:         mov     esi, ppdev
                   1505:         test    fl, FL_FLIP_V
                   1506:         jz      short bank_y_down
                   1507: 
                   1508: bank_y_up:
                   1509:         mov     edi, strip.ST_pjScreen
                   1510:         sub     edi, [esi].pdev_pvBitmapStart
                   1511:         mov     ecx, [esi].pdev_rcl1WindowClip.yTop
                   1512:         push    ecx
                   1513:         dec     ecx                     ; we want yTop - 1 to be mapped in
                   1514: 
                   1515: ; Map in the next higher bank:
                   1516: 
                   1517:         ptrCall <dword ptr [esi].pdev_pfnBankControl>, \
                   1518:                 <esi, ecx, JustifyBottom>; ebx, esi and edi are preserved
                   1519: 
                   1520:         pop     ecx
                   1521:         sub     ecx, [esi].pdev_rcl1WindowClip.yTop
                   1522:                                         ; ecx = # of scans can do in bank
                   1523: 
                   1524:         add     edi, [esi].pdev_pvBitmapStart
                   1525:         mov     strip.ST_pjScreen, edi
                   1526: 
                   1527:         mov     edx, cPelsAfterThisBank                 ; edx = cPelsAfterBank
                   1528:         lea     eax, [strip.ST_alStrips]                ; eax = plStrip
                   1529:         or      ebx, ebx                                ; ebx = cPels
                   1530:         jge     bank_y_done_partial_strip
                   1531:         jmp     short bank_y_done_switch
                   1532: 
                   1533: bank_y_down:
                   1534:         mov     edi, strip.ST_pjScreen
                   1535:         sub     edi, [esi].pdev_pvBitmapStart
                   1536:         mov     ecx, [esi].pdev_rcl1WindowClip.yBottom
                   1537:         push    ecx
                   1538: 
                   1539: ; Map in the next lower bank:
                   1540: 
                   1541:         ptrCall <dword ptr [esi].pdev_pfnBankControl>, \
                   1542:                 <esi, ecx, JustifyTop>  ; ebx, esi and edi are preserved
                   1543: 
                   1544:         pop     eax
                   1545:         mov     ecx, [esi].pdev_rcl1WindowClip.yBottom
                   1546:         sub     ecx, eax                ; ecx = # scans can do in bank
                   1547: 
                   1548:         add     edi, [esi].pdev_pvBitmapStart
                   1549:         mov     strip.ST_pjScreen,edi
                   1550: 
                   1551:         mov     edx, cPelsAfterThisBank                 ; edx = cPelsAfterBank
                   1552:         lea     eax, [strip.ST_alStrips]                ; eax = plStrip
                   1553:         or      ebx, ebx                                ; ebx = cPels
                   1554:         jge     short bank_y_done_partial_strip
                   1555: 
                   1556: bank_y_done_switch:
                   1557: 
                   1558: ; Handle a single strip stretching over multiple banks:
                   1559: 
                   1560:         test    fl, FL_FLIP_HALF
                   1561:         jz      short bank_y_no_half_flip
                   1562: 
                   1563: ; We now have to adjust for the fact that the strip drawers always leave
                   1564: ; the state ready for the next new strip (e.g., if we're doing vertical
                   1565: ; strips, it advances pjScreen one to the right after drawing each strip).
                   1566: ; But the problem is that since we crossed a bank, we have to continue the
                   1567: ; *old* strip, so we have to undo that advance:
                   1568: 
                   1569: bank_y_half_flip:
                   1570:         inc     strip.ST_pjScreen
                   1571:         jmp     short bank_y_done_bit_adjust
                   1572: 
                   1573: bank_y_no_half_flip:
                   1574:         dec     strip.ST_pjScreen
                   1575: 
                   1576: bank_y_done_bit_adjust:
                   1577:         mov     esi, ebx
                   1578:         neg     esi                             ; esi = # pels left in strip
                   1579: 
                   1580: ; eax = pointer to first strip entry
                   1581: ; ebx = negative esi
                   1582: ; ecx = # of pels we can put down in this window
                   1583: ; edx = # of pels remaining to do in line
                   1584: ; esi = # of pels left in strip
                   1585: 
                   1586: ; We have three special cases to check here:
                   1587: ;
                   1588: ;       1) If the strip spans the entire next window
                   1589: ;       2) This is the last strip in the line
                   1590: ;       3) Neither of the above
                   1591: 
                   1592:         cmp     edx,ecx                         ;if line shorter than bank,
                   1593:         jle     short bank_y_check_if_last_strip;  know strip doesn't span bank
                   1594: 
                   1595:         cmp     esi,ecx                         ;if line spans bank, don't have
                   1596:         jl      short bank_y_continue_strip     ;  to check if last strip
                   1597: 
                   1598: ; If ((# of pels in line > window size) && (# of pels in strip > window size))
                   1599: ; then the strip spans this bank:
                   1600: 
                   1601:         mov     [eax], ecx
                   1602:         add     eax, 4
                   1603:         add     ebx, ecx
                   1604:         sub     edx, ecx
                   1605:         mov     cPelsAfterThisBank, edx
                   1606:         jmp     bank_y_output_strips
                   1607: 
                   1608: bank_y_check_if_last_strip:
                   1609:         cmp     esi, edx                        ;if strip is shorter than line,
                   1610:         jl      short bank_y_continue_strip     ;  we know this isn't the last
                   1611:                                                 ;  strip
                   1612: 
                   1613: ; Handle case where this is the last strip in the line and it overlaps a bank:
                   1614: 
                   1615:         mov     [eax], edx
                   1616:         add     eax, 4
                   1617:         jmp     very_final_strip
                   1618: 
                   1619: bank_y_continue_strip:
                   1620:         mov     [eax], esi
                   1621:         add     eax, 4
                   1622: 
                   1623: bank_y_done_partial_strip:
                   1624:         add     ebx, edx                ; cPels += cPelsAfterThisBank
                   1625:         sub     edx, ecx                ; cPelsAfterThisBank -= cyWindow
                   1626: 
                   1627:         jle     short bank_y_get_ready
                   1628:         sub     ebx, edx
                   1629: 
                   1630: bank_y_get_ready:
                   1631:         mov     cPelsAfterThisBank, edx
                   1632:         mov     edi, r
                   1633:         mov     edx, d_R
                   1634:         mov     ecx, dN
                   1635:         jmp     done_output_strips
                   1636: 
                   1637: ;---------------------------Private-Routine-----------------------------;
                   1638: ; do_some_styling
                   1639: ;
                   1640: ; Inputs:
                   1641: ;       eax = ptlStart.ptl_y
                   1642: ;       ebx = fl
                   1643: ;       ecx = ptlStart.ptl_x
                   1644: ; Preserves:
                   1645: ;       eax, ebx, ecx
                   1646: ; Output:
                   1647: ;       Exits to done_styling.
                   1648: ;
                   1649: ;-----------------------------------------------------------------------;
                   1650: 
                   1651:         public  do_some_styling
                   1652: do_some_styling:
                   1653:         mov     esi, pls
                   1654:         mov     ptlStart.ptl_x, ecx
                   1655: 
                   1656:         mov     edi, [esi].LS_spNext    ; spThis
                   1657:         mov     edx, edi
                   1658:         add     edx, cStylePels         ; spNext
                   1659: 
                   1660: do_non_alternate_style:
                   1661: 
                   1662: ; For styles, we don't bother to keep the style position normalized.
                   1663: ; (we do ensure that it's positive, though).  If a figure is over 2
                   1664: ; billion pels long, we'll be a pel off in our style state (oops!).
                   1665: 
                   1666:         and     edx, 7fffffffh
                   1667:         mov     [esi].LS_spNext, edx
                   1668:         mov     ptlStart.ptl_y, eax
                   1669: 
                   1670:         testb   ebx, FL_FLIP_H
                   1671:         jz      short arbitrary_left_to_right
                   1672: 
                   1673:         sub     edx, x0
                   1674:         add     edx, xStart
                   1675:         mov     eax, edx
                   1676:         xor     edx, edx
                   1677:         div     [esi].LS_spTotal
                   1678: 
                   1679:         neg     edx
                   1680:         jge     short continue_right_to_left
                   1681:         add     edx, [esi].LS_spTotal
                   1682:         not     eax
                   1683: 
                   1684: continue_right_to_left:
                   1685:         mov     edi, dword ptr [esi].LS_bStartIsGap
                   1686:         not     edi
                   1687:         mov     ecx, [esi].LS_aspRtoL
                   1688:         jmp     short compute_arbitrary_stuff
                   1689: 
                   1690: arbitrary_left_to_right:
                   1691:         add     edi, x0
                   1692:         sub     edi, xStart
                   1693:         mov     eax, edi
                   1694:         xor     edx, edx
                   1695:         div     [esi].LS_spTotal
                   1696:         mov     edi, dword ptr [esi].LS_bStartIsGap
                   1697:         mov     ecx, [esi].LS_aspLtoR
                   1698: 
                   1699: compute_arbitrary_stuff:
                   1700: ;       eax = sp / spTotal
                   1701: ;       ebx = fl
                   1702: ;       ecx = pspStart
                   1703: ;       edx = sp % spTotal
                   1704: ;       esi = pls
                   1705: ;       edi = bIsGap
                   1706: 
                   1707:         and     eax, [esi].LS_cStyle        ; if odd length style and second run
                   1708:         and     al, 1                       ; through style array, flip the
                   1709:         jz      short odd_style_array_done  ; meaning of the elements
                   1710:         not     edi
                   1711: 
                   1712: odd_style_array_done:
                   1713:         mov     eax, [esi].LS_cStyle
                   1714:         mov     strip.ST_pspStart, ecx
                   1715:         lea     eax, [ecx + eax * 4 - 4]
                   1716:         mov     strip.ST_pspEnd, eax
                   1717: 
                   1718: find_psp:
                   1719:         sub     edx, [ecx]
                   1720:         jl      short found_psp
                   1721:         add     ecx, 4
                   1722:         jmp     short find_psp
                   1723: 
                   1724: found_psp:
                   1725:         mov     strip.ST_psp, ecx
                   1726:         neg     edx
                   1727:         mov     strip.ST_spRemaining, edx
                   1728: 
                   1729:         sub     ecx, strip.ST_pspStart
                   1730:         test    ecx, 4                      ; size STYLEPOS
                   1731:         jz      short done_arbitrary
                   1732:         not     edi
                   1733: 
                   1734: done_arbitrary:
                   1735:         mov     dword ptr strip.ST_bIsGap, edi
                   1736:         mov     eax, ptlStart.ptl_y
                   1737:         mov     ecx, ptlStart.ptl_x
                   1738:         jmp     done_styling
                   1739: 
                   1740: ;---------------------------Private-Routine-----------------------------;
                   1741: ; do_some_clipping
                   1742: ;
                   1743: ; Inputs:
                   1744: ;       eax = garbage
                   1745: ;       ebx = fl
                   1746: ;       ecx = x0
                   1747: ;       edx = garbage
                   1748: ;       esi = x1
                   1749: ;       edi = garbage
                   1750: ;
                   1751: ; Decides whether to do simple or complex clipping.
                   1752: ;
                   1753: ;-----------------------------------------------------------------------;
                   1754: 
                   1755:         align 4
                   1756: 
                   1757:         public  do_some_clipping
                   1758: do_some_clipping:
                   1759:         testb   ebx, FL_COMPLEX_CLIP
                   1760:         jnz     initialize_complex_clipping
                   1761: 
                   1762: ;-----------------------------------------------------------------------;
                   1763: ; simple_clipping
                   1764: ;
                   1765: ; Inputs:
                   1766: ;       ebx = fl
                   1767: ;       ecx = x0
                   1768: ;       esi = x1
                   1769: ; Output:
                   1770: ;       ebx = fl
                   1771: ;       ecx = new x0 (stack variable updated too)
                   1772: ;       esi = new x1
                   1773: ;       y0 stack variable updated
                   1774: ; Uses:
                   1775: ;       All registers
                   1776: ; Exits:
                   1777: ;       to done_clipping
                   1778: ;
                   1779: ; This routine handles clipping the line to the clip rectangle (it's
                   1780: ; faster to handle this case in the driver than to call the engine to
                   1781: ; clip for us).
                   1782: ;
                   1783: ; Fractional end-point lines complicate our lives a bit when doing
                   1784: ; clipping:
                   1785: ;
                   1786: ; 1) For styling, we must know the unclipped line's length in pels, so
                   1787: ;    that we can correctly update the styling state when the line is
                   1788: ;    clipped.  For this reason, I do clipping after doing the hard work
                   1789: ;    of figuring out which pixels are at the ends of the line (this is
                   1790: ;    wasted work if the line is not styled and is completely clipped,
                   1791: ;    but I think it's simpler this way).  Another reason is that we'll
                   1792: ;    have calculated eqGamma already, which we use for the intercept
                   1793: ;    calculations.
                   1794: ;
                   1795: ;    With the assumption that most lines will not be completely clipped
                   1796: ;    away, this strategy isn't too painful.
                   1797: ;
                   1798: ; 2) x0, y0 are not necessarily zero, where (x0, y0) is the start pel of
                   1799: ;    the line.
                   1800: ;
                   1801: ; 3) We know x0, y0 and x1, but not y1.  We haven't needed to calculate
                   1802: ;    y1 until now.  We'll need the actual value, and not an upper bound
                   1803: ;    like y1 = LFLOOR(dM) + 2 because we have to be careful when
                   1804: ;    calculating x(y) that y0 <= y <= y1, otherwise we can cause an
                   1805: ;    overflow on the divide (which, needless to say, is bad).
                   1806: ;
                   1807: ;-----------------------------------------------------------------------;
                   1808: 
                   1809:         public  simple_clipping
                   1810: simple_clipping:
                   1811:         mov     edi, prclClip           ; get pointer to normalized clip rect
                   1812:         and     ebx, FL_RECTLCLIP_MASK  ;   (it's lower-right exclusive)
                   1813: 
                   1814:         .errnz  (FL_RECTLCLIP_SHIFT - 2); ((ebx AND FL_RECTLCLIP_MASK) shr
                   1815:         .errnz  (size RECTL) - 16       ;   FL_RECTLCLIP_SHIFT) is our index
                   1816:         lea     edi, [edi + ebx*4]      ;   into the array of rectangles
                   1817: 
                   1818:         mov     edx, [edi].xRight       ; load the rect coordinates
                   1819:         mov     eax, [edi].xLeft
                   1820:         mov     ebx, [edi].yBottom
                   1821:         mov     edi, [edi].yTop
                   1822: 
                   1823: ; Translate to our origin and so some quick completely clipped tests:
                   1824: 
                   1825:         sub     edx, x
                   1826:         cmp     ecx, edx
                   1827:         jge     totally_clipped         ; totally clipped if x0 >= xRight
                   1828: 
                   1829:         sub     eax, x
                   1830:         cmp     esi, eax
                   1831:         jl      totally_clipped         ; totally clipped if x1 < xLeft
                   1832: 
                   1833:         sub     ebx, y
                   1834:         cmp     y0, ebx
                   1835:         jge     totally_clipped         ; totally clipped if y0 >= yBottom
                   1836: 
                   1837:         sub     edi, y
                   1838: 
                   1839: ; Save some state:
                   1840: 
                   1841:         mov     xClipRight, edx
                   1842:         mov     xClipLeft, eax
                   1843: 
                   1844:         cmp     esi, edx                ; if (x1 >= xRight) x1 = xRight - 1
                   1845:         jl      short calculate_y1
                   1846:         lea     esi, [edx - 1]
                   1847: 
                   1848: calculate_y1:
                   1849:         mov     eax, esi                ; y1 = (x1 * dN + eqGamma) / dM
                   1850:         mul     dN
                   1851:         add     eax, eqGamma_lo
                   1852:         adc     edx, eqGamma_hi
                   1853:         div     dM
                   1854: 
                   1855:         cmp     edi, eax                ; if (yTop > y1) clipped
                   1856:         jg      short totally_clipped
                   1857: 
                   1858:         cmp     ebx, eax                ; if (yBottom > y1) know x1
                   1859:         jg      short x1_computed
                   1860: 
                   1861:         mov     eax, ebx                ; x1 = (yBottom * dM + eqBeta) / dN
                   1862:         mul     dM
                   1863:         stc
                   1864:         sbb     eax, eqGamma_lo
                   1865:         sbb     edx, eqGamma_hi
                   1866:         div     dN
                   1867:         mov     esi, eax
                   1868: 
                   1869: ; At this point, we've taken care of calculating the intercepts with the
                   1870: ; right and bottom edges.  Now we work on the left and top edges:
                   1871: 
                   1872: x1_computed:
                   1873:         mov     edx, y0
                   1874: 
                   1875:         mov     eax, xClipLeft          ; don't have to compute y intercept
                   1876:         cmp     eax, ecx                ;   at left edge if line starts to
                   1877:         jle     short top_intercept     ;   right of left edge
                   1878: 
                   1879:         mov     ecx, eax                ; x0 = xLeft
                   1880:         mul     dN                      ; y0 = (xLeft * dN + eqGamma) / dM
                   1881:         add     eax, eqGamma_lo
                   1882:         adc     edx, eqGamma_hi
                   1883:         div     dM
                   1884: 
                   1885:         cmp     ebx, eax                ; if (yBottom <= y0) clipped
                   1886:         jle     short totally_clipped
                   1887: 
                   1888:         mov     edx, eax
                   1889:         mov     y0, eax
                   1890: 
                   1891: top_intercept:
                   1892:         mov     ebx, fl                 ; get ready to leave
                   1893:         mov     x0, ecx
                   1894: 
                   1895:         cmp     edi, edx                ; if (yTop <= y0) done clipping
                   1896:         jle     done_clipping
                   1897: 
                   1898:         mov     eax, edi                ; x0 = (yTop * dM + eqBeta) / dN + 1
                   1899:         mul     dM
                   1900:         stc
                   1901:         sbb     eax, eqGamma_lo
                   1902:         sbb     edx, eqGamma_hi
                   1903:         div     dN
                   1904:         lea     ecx, [eax + 1]
                   1905: 
                   1906:         cmp     xClipRight, ecx         ; if (xRight <= x0) clipped
                   1907:         jle     short totally_clipped
                   1908: 
                   1909:         mov     y0, edi                 ; y0 = yTop
                   1910:         mov     x0, ecx
                   1911:         jmp     done_clipping           ; all done!
                   1912: 
                   1913: totally_clipped:
                   1914: 
                   1915: ; The line is completely clipped.  See if we have to update our style state:
                   1916: 
                   1917:         mov     ebx, fl
                   1918:         testb   ebx, FL_STYLED
                   1919:         jz      next_line
                   1920: 
                   1921: ; Adjust our style state:
                   1922: 
                   1923:         mov     esi, pls
                   1924:         mov     eax, [esi].LS_spNext
                   1925:         add     eax, cStylePels
                   1926:         mov     [esi].LS_spNext, eax
                   1927: 
                   1928:         cmp     eax, [esi].LS_spTotal2
                   1929:         jb      next_line
                   1930: 
                   1931: ; Have to normalize first:
                   1932: 
                   1933:         xor     edx, edx
                   1934:         div     [esi].LS_spTotal2
                   1935:         mov     [esi].LS_spNext, edx
                   1936: 
                   1937:         jmp     next_line
                   1938: 
                   1939: ;-----------------------------------------------------------------------;
                   1940: 
                   1941: initialize_complex_clipping:
                   1942:         mov     eax, dN                 ; save a copy of original dN
                   1943:         mov     dN_Original, eax
                   1944: 
                   1945: ;---------------------------Private-Routine-----------------------------;
                   1946: ; continue_complex_clipping
                   1947: ;
                   1948: ; Inputs:
                   1949: ;       ebx = fl
                   1950: ; Output:
                   1951: ;       ebx = fl
                   1952: ;       ecx = x0
                   1953: ;       esi = x1
                   1954: ; Uses:
                   1955: ;       All registers.
                   1956: ; Exits:
                   1957: ;       to done_clipping
                   1958: ;
                   1959: ; This routine handles the necessary initialization for the next
                   1960: ; run in the CLIPLINE structure.
                   1961: ;
                   1962: ; NOTE: This routine is jumped to from two places!
                   1963: ;-----------------------------------------------------------------------;
                   1964: 
                   1965:         public  continue_complex_clipping
                   1966: continue_complex_clipping:
                   1967:         mov     edi, prun
                   1968:         mov     ecx, xStart
                   1969:         testb   ebx, FL_FLIP_H
                   1970:         jz      short complex_left_to_right
                   1971: 
                   1972: complex_right_to_left:
                   1973: 
                   1974: ; Figure out x0 and x1 for right-to-left lines:
                   1975: 
                   1976:         add     ecx, cStylePels
                   1977:         dec     ecx
                   1978:         mov     esi, ecx                ; esi = ecx = xStart + cStylePels - 1
                   1979:         sub     ecx, [edi].RUN_iStop    ; New x0
                   1980:         sub     esi, [edi].RUN_iStart   ; New x1
                   1981:         jmp     short complex_reset_variables
                   1982: 
                   1983: complex_left_to_right:
                   1984: 
                   1985: ; Figure out x0 and x1 for left-to-right lines:
                   1986: 
                   1987:         mov     esi, ecx                ; esi = ecx = xStart
                   1988:         add     ecx, [edi].RUN_iStart   ; New x0
                   1989:         add     esi, [edi].RUN_iStop    ; New x1
                   1990: 
                   1991: complex_reset_variables:
                   1992:         mov     x0, ecx
                   1993: 
                   1994: ; The half flip mucks with some of our variables, and we have to reset
                   1995: ; them every pass.  We would have to reset eqGamma too, but it never
                   1996: ; got saved to memory in its modified form.
                   1997: 
                   1998:         add     edi, size RUN
                   1999:         mov     prun, edi               ; Increment run pointer for next time
                   2000: 
                   2001:         mov     edi, pls
                   2002:         mov     eax, [edi].LS_spComplex
                   2003:         mov     [edi].LS_spNext, eax    ; pls->spNext = pls->spComplex
                   2004: 
                   2005:         mov     eax, dN_Original        ; dN = dN_Original
                   2006:         mov     dN, eax
                   2007: 
                   2008:         mul     ecx
                   2009:         add     eax, eqGamma_lo
                   2010:         adc     edx, eqGamma_hi         ; [edx:eax] = dN*x0 + eqGamma
                   2011: 
                   2012:         div     dM
                   2013:         mov     y0, eax
                   2014:         jmp     done_clipping
                   2015: 
                   2016: endProc bLines
                   2017: 
                   2018:         end
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.