ntddk/src/video/displays/vga/i386/vgalines.asm - annotate

Return to vgalines.asm CVS log
Up to [WindowsNT SDKs] / ntddk / src / video / displays / vga / i386
Annotation of ntddk/src/video/displays/vga/i386/vgalines.asm, revision 1.1.1.1

1.1       root        1: ;---------------------------Module-Header------------------------------;
                      2: ; Module Name: lines.asm
                      3: ;
                      4: ; Draws a set of connected polylines.  Initialization for the device
                      5: ; or bitmap has already been done in the stroke routine.  Solid and
                      6: ; styled lines are handled for both the device and bitmaps.  Banking
                      7: ; for the display is handled.
                      8: ;
                      9: ; The code is different depending on whether we are drawing solid
                     10: ; lines, styled lines with common styles, or lines with completely
                     11: ; arbitrary styles.
                     12: ;
                     13: ; There are sixteen raster operations (sets of logical operations)
                     14: ; performed on the data written out.  When writing to the VGA there are
                     15: ; four of these operations which take two passes of VGA memory.  In
                     16: ; each of these cases the first pass inverts the necessary bits in the
                     17: ; necessary planes.  The second pass then performs the rest of the
                     18: ; raster operation.  The other twelve raster operations can be done in
                     19: ; one pass of VGA memory.  All raster operations are done in one pass of
                     20: ; memory for bitmaps.  Depending on the raster operation and the color
                     21: ; of the pen, it is easily determined whether we set bits to zeros, set
                     22: ; bits to ones, invert bits or do nothing.  Bitmaps are written to one
                     23: ; plane at a time.
                     24: ;
                     25: ; Lines are drawn from left to right.  So if a line moves from right
                     26: ; to left, the endpoints are swapped and the line is drawn from left to
                     27: ; right.
                     28: ;
                     29: ; Copyright (c) 1992 Microsoft Corporation
                     30: ;-----------------------------------------------------------------------;
                     31: 
                     32:         .386
                     33: 
                     34:         .model  small,c
                     35: 
                     36:         assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
                     37:         assume fs:nothing,gs:nothing
                     38: 
                     39:         .xlist
                     40:         include stdcall.inc             ;calling convention cmacros
                     41:         include i386\egavga.inc
                     42:         include i386\strucs.inc
                     43:         include i386\lines.inc
                     44:         .list
                     45: 
                     46:         .data
                     47: 
                     48:         public gaflRoundTable
                     49: gaflRoundTable       label  dword
                     50:         dd      FL_H_ROUND_DOWN + FL_V_ROUND_DOWN       ; no flips
                     51:         dd      FL_H_ROUND_DOWN + FL_V_ROUND_DOWN       ; D flip
                     52:         dd      FL_H_ROUND_DOWN                         ; V flip
                     53:         dd      FL_V_ROUND_DOWN                         ; D & V flip
                     54:         dd      FL_V_ROUND_DOWN                         ; slope one
                     55:         dd      0baadf00dh
                     56:         dd      FL_H_ROUND_DOWN                         ; slope one & V flip
                     57:         dd      0baadf00dh
                     58: 
                     59:         .code
                     60: 
                     61: _TEXT$03   SEGMENT DWORD USE32 PUBLIC 'CODE'
                     62:            ASSUME  CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
                     63: 
                     64: ;--------------------------------Macro----------------------------------;
                     65: ; testb ebx, <mask>
                     66: ;
                     67: ; Substitutes a byte compare if the mask is entirely in the lo-byte or
                     68: ; hi-byte (thus saving 3 bytes of code space).
                     69: ;
                     70: ;-----------------------------------------------------------------------;
                     71: 
                     72: TESTB   macro   targ,mask,thirdarg
                     73:         local   mask2,delta
                     74: 
                     75: ifnb <thirdarg>
                     76:         .err    TESTB mask must be enclosed in brackets!
                     77: endif
                     78: 
                     79:         delta = 0
                     80:         mask2 = mask
                     81: 
                     82:         if mask2 AND 0ffff0000h
                     83:             test targ,mask                      ; If bit set in hi-word,
                     84:             exitm                               ; test entire dword
                     85:         endif
                     86: 
                     87:         if mask2 AND 0ff00h
                     88:             if mask2 AND 0ffh                   ; If bit set in lo-byte and
                     89:                 test targ,mask                  ; hi-byte, test entire dword
                     90:                 exitm
                     91:             endif
                     92: 
                     93:             mask2 = mask2 SHR 8
                     94:             delta = 1
                     95:         endif
                     96: 
                     97: ifidni <targ>,<EBX>
                     98:         if delta
                     99:             test bh,mask2
                    100:         else
                    101:             test bl,mask2
                    102:         endif
                    103:         exitm
                    104: endif
                    105: 
                    106:         .err    Too bad TESTB doesn't support targets other than ebx!
                    107: endm
                    108: 
                    109: ;---------------------------Public-Routine------------------------------;
                    110: ; bLines(pdsurf, pptfxFirst, pptfxBuf, prun, cptfx, pls,
                    111: ;        prclClip, apfn[], flStart)
                    112: ;
                    113: ; Do all the DDA calculations for lines.
                    114: ;
                    115: ; Doing Lines Right
                    116: ; -----------------
                    117: ;
                    118: ; In NT, all lines are given to the device driver in fractional
                    119: ; coordinates, in a 28.4 fixed point format.  The lower 4 bits are
                    120: ; fractional for sub-pixel positioning.
                    121: ;
                    122: ; Note that you CANNOT! just round the coordinates to integers
                    123: ; and pass the results to your favorite integer Bresenham routine!!
                    124: ; (Unless, of course, you have such a high resolution device that
                    125: ; nobody will notice -- not likely for a display device.)  The
                    126: ; fractions give a more accurate rendering of the line -- this is
                    127: ; important for things like our Bezier curves, which would have 'kinks'
                    128: ; if the points in its polyline approximation were rounded to integers.
                    129: ;
                    130: ; Unfortunately, for fractional lines there is more setup work to do
                    131: ; a DDA than for integer lines.  However, the main loop is exactly
                    132: ; the same (and can be done entirely with 32 bit math).
                    133: ;
                    134: ; If You've Got Hardware That Does Bresenham
                    135: ; ------------------------------------------
                    136: ;
                    137: ; A lot of hardware limits DDA error terms to 'n' bits.  With fractional
                    138: ; coordinates, 4 bits are given to the fractional part, letting
                    139: ; you draw in hardware only those lines that lie entirely in a 2^(n-4)
                    140: ; by 2^(n-4) pixel space.
                    141: ;
                    142: ; And you still have to correctly draw those lines with coordinates
                    143: ; outside that space!  Remember that the screen is only a viewport
                    144: ; onto a 28.4 by 28.4 space -- if any part of the line is visible
                    145: ; you MUST render it precisely, regardless of where the end points lie.
                    146: ; So even if you do it in software, somewhere you'll have to have a
                    147: ; 32 bit DDA routine.
                    148: ;
                    149: ; Our Implementation
                    150: ; ------------------
                    151: ;
                    152: ; We employ a run length slice algorithm: our DDA calculates the
                    153: ; number of pixels that are in each row (or 'strip') of pixels.
                    154: ;
                    155: ; We've separated the running of the DDA and the drawing of pixels:
                    156: ; we run the DDA for several iterations and store the results in
                    157: ; a 'strip' buffer (which are the lengths of consecutive pixel rows of
                    158: ; the line), then we crank up a 'strip drawer' that will draw all the
                    159: ; strips in the buffer.
                    160: ;
                    161: ; We also employ a 'half-flip' to reduce the number of strip
                    162: ; iterations we need to do in the DDA and strip drawing loops: when a
                    163: ; (normalized) line's slope is more than 1/2, we do a final flip
                    164: ; about the line y = (1/2)x.  So now, instead of each strip being
                    165: ; consecutive horizontal or vertical pixel rows, each strip is composed
                    166: ; of those pixels aligned in 45 degree rows.  So a line like (0, 0) to
                    167: ; (128, 128) would generate only one strip.
                    168: ;
                    169: ; We also always draw only left-to-right.
                    170: ;
                    171: ; Style lines may have arbitrary style patterns.  We specially
                    172: ; optimize the default patterns (and call them 'masked' styles).
                    173: ;
                    174: ; The DDA Derivation
                    175: ; ------------------
                    176: ;
                    177: ; Here is how I like to think of the DDA calculation.
                    178: ;
                    179: ; We employ Knuth's "diamond rule": rendering a one-pixel-wide line
                    180: ; can be thought of as dragging a one-pixel-wide by one-pixel-high
                    181: ; diamond along the true line.  Pixel centers lie on the integer
                    182: ; coordinates, and so we light any pixel whose center gets covered
                    183: ; by the "drag" region (John D. Hobby, Journal of the Association
                    184: ; for Computing Machinery, Vol. 36, No. 2, April 1989, pp. 209-229).
                    185: ;
                    186: ; We must define which pixel gets lit when the true line falls
                    187: ; exactly half-way between two pixels.  In this case, we follow
                    188: ; the rule: when two pels are equidistant, the upper or left pel
                    189: ; is illuminated, unless the slope is exactly one, in which case
                    190: ; the upper or right pel is illuminated.  (So we make the edges
                    191: ; of the diamond exclusive, except for the top and left vertices,
                    192: ; which are inclusive, unless we have slope one.)
                    193: ;
                    194: ; This metric decides what pixels should be on any line BEFORE it is
                    195: ; flipped around for our calculation.  Having a consistent metric
                    196: ; this way will let our lines blend nicely with our curves.  The
                    197: ; metric also dictates that we will never have one pixel turned on
                    198: ; directly above another that's turned on.  We will also never have
                    199: ; a gap; i.e., there will be exactly one pixel turned on for each
                    200: ; column between the start and end points.  All that remains to be
                    201: ; done is to decide how many pixels should be turned on for each row.
                    202: ;
                    203: ; So lines we draw will consist of varying numbers of pixels on
                    204: ; successive rows, for example:
                    205: ;
                    206: ;       ******
                    207: ;             *****
                    208: ;                  ******
                    209: ;                        *****
                    210: ;
                    211: ; We'll call each set of pixels on a row a "strip".
                    212: ;
                    213: ; (Please remember that our coordinate space has the origin as the
                    214: ; upper left pixel on the screen; postive y is down and positive x
                    215: ; is right.)
                    216: ;
                    217: ; Device coordinates are specified as fixed point 28.4 numbers,
                    218: ; where the first 28 bits are the integer coordinate, and the last
                    219: ; 4 bits are the fraction.  So coordinates may be thought of as
                    220: ; having the form (x, y) = (M/F, N/F) where F is the constant scaling
                    221: ; factor F = 2^4 = 16, and M and N are 32 bit integers.
                    222: ;
                    223: ; Consider the line from (M0/F, N0/F) to (M1/F, N1/F) which runs
                    224: ; left-to-right and whose slope is in the first octant, and let
                    225: ; dM = M1 - M0 and dN = N1 - N0.  Then dM >= 0, dN >= 0 and dM >= dN.
                    226: ;
                    227: ; Since the slope of the line is less than 1, the edges of the
                    228: ; drag region are created by the top and bottom vertices of the
                    229: ; diamond.  At any given pixel row y of the line, we light those
                    230: ; pixels whose centers are between the left and right edges.
                    231: ;
                    232: ; Let mL(n) denote the line representing the left edge of the drag
                    233: ; region.  On pixel row j, the column of the first pixel to be
                    234: ; lit is
                    235: ;
                    236: ;       iL(j) = ceiling( mL(j * F) / F)
                    237: ;
                    238: ; Since the line's slope is less than one:
                    239: ;
                    240: ;       iL(j) = ceiling( mL([j + 1/2] F) / F )
                    241: ;
                    242: ; Recall the formula for our line:
                    243: ;
                    244: ;       n(m) = (dN / dM) (m - M0) + N0
                    245: ;
                    246: ;       m(n) = (dM / dN) (n - N0) + M0
                    247: ;
                    248: ; Since the line's slope is less than one, the line representing
                    249: ; the left edge of the drag region is the original line offset
                    250: ; by 1/2 pixel in the y direction:
                    251: ;
                    252: ;       mL(n) = (dM / dN) (n - F/2 - N0) + M0
                    253: ;
                    254: ; From this we can figure out the column of the first pixel that
                    255: ; will be lit on row j, being careful of rounding (if the left
                    256: ; edge lands exactly on an integer point, the pixel at that
                    257: ; point is not lit because of our rounding convention):
                    258: ;
                    259: ;       iL(j) = floor( mL(j F) / F ) + 1
                    260: ;
                    261: ;             = floor( ((dM / dN) (j F - F/2 - N0) + M0) / F ) + 1
                    262: ;
                    263: ;             = floor( F dM j - F/2 dM - N0 dM + dN M0) / F dN ) + 1
                    264: ;
                    265: ;                      F dM j - [ dM (N0 + F/2) - dN M0 ]
                    266: ;             = floor( ---------------------------------- ) + 1
                    267: ;                                   F dN
                    268: ;
                    269: ;                      dM j - [ dM (N0 + F/2) - dN M0 ] / F
                    270: ;             = floor( ------------------------------------ ) + 1       (1)
                    271: ;                                     dN
                    272: ;
                    273: ;             = floor( (dM j + alpha) / dN ) + 1
                    274: ;
                    275: ; where
                    276: ;
                    277: ;       alpha = - [ dM (N0 + F/2) - dN M0 ] / F
                    278: ;
                    279: ; We use equation (1) to calculate the DDA: there are iL(j+1) - iL(j)
                    280: ; pixels in row j.  Because we are always calculating iL(j) for
                    281: ; integer quantities of j, we note that the only fractional term
                    282: ; is constant, and so we can 'throw away' the fractional bits of
                    283: ; alpha:
                    284: ;
                    285: ;       beta = floor( - [ dM (N0 + F/2) - dN M0 ] / F )                 (2)
                    286: ;
                    287: ; so
                    288: ;
                    289: ;       iL(j) = floor( (dM j + beta) / dN ) + 1                         (3)
                    290: ;
                    291: ; for integers j.
                    292: ;
                    293: ; Note if iR(j) is the line's rightmost pixel on row j, that
                    294: ; iR(j) = iL(j + 1) - 1.
                    295: ;
                    296: ; Similarly, rewriting equation (1) as a function of column i,
                    297: ; we can determine, given column i, on which pixel row j is the line
                    298: ; lit:
                    299: ;
                    300: ;                       dN i + [ dM (N0 + F/2) - dN M0 ] / F
                    301: ;       j(i) = ceiling( ------------------------------------ ) - 1
                    302: ;                                       dM
                    303: ;
                    304: ; Floors are easier to compute, so we can rewrite this:
                    305: ;
                    306: ;                     dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F
                    307: ;       j(i) = floor( ----------------------------------------------- ) - 1
                    308: ;                                       dM
                    309: ;
                    310: ;                     dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F - dM
                    311: ;            = floor( ---------------------------------------------------- )
                    312: ;                                       dM
                    313: ;
                    314: ;                     dN i + [ dM (N0 + F/2) - dN M0 - 1 ] / F
                    315: ;            = floor( ---------------------------------------- )
                    316: ;                                       dM
                    317: ;
                    318: ; We can once again wave our hands and throw away the fractional bits
                    319: ; of the remainder term:
                    320: ;
                    321: ;       j(i) = floor( (dN i + gamma) / dM )                             (4)
                    322: ;
                    323: ; where
                    324: ;
                    325: ;       gamma = floor( [ dM (N0 + F/2) - dN M0 - 1 ] / F )              (5)
                    326: ;
                    327: ; We now note that
                    328: ;
                    329: ;       beta = -gamma - 1 = ~gamma                                      (6)
                    330: ;
                    331: ; To draw the pixels of the line, we could evaluate (3) on every scan
                    332: ; line to determine where the strip starts.  Of course, we don't want
                    333: ; to do that because that would involve a multiply and divide for every
                    334: ; scan.  So we do everything incrementally.
                    335: ;
                    336: ; We would like to easily compute c , the number of pixels on scan j:
                    337: ;                                  j
                    338: ;
                    339: ;    c  = iL(j + 1) - iL(j)
                    340: ;     j
                    341: ;
                    342: ;       = floor((dM (j + 1) + beta) / dN) - floor((dM j + beta) / dN)   (7)
                    343: ;
                    344: ; This may be rewritten as
                    345: ;
                    346: ;    c  = floor(i    + r    / dN) - floor(i  + r  / dN)                 (8)
                    347: ;     j          j+1    j+1                j    j
                    348: ;
                    349: ; where i , i    are integers and r  < dN, r    < dN.
                    350: ;        j   j+1                   j        j+1
                    351: ;
                    352: ; Rewriting (7) again:
                    353: ;
                    354: ;    c  = floor(i  + r  / dN + dM / dN) - floor(i  + r  / dN)
                    355: ;     j          j    j                          j    j
                    356: ;
                    357: ;
                    358: ;       = floor((r  + dM) / dN) - floor(r  / dN)
                    359: ;                 j                      j
                    360: ;
                    361: ; This may be rewritten as
                    362: ;
                    363: ;    c  = dI + floor((r  + dR) / dN) - floor(r  / dN)
                    364: ;     j                j                      j
                    365: ;
                    366: ; where dI + dR / dN = dM / dN, dI is an integer and dR < dN.
                    367: ;
                    368: ; r  is the remainder (or "error") term in the DDA loop: r  / dN
                    369: ;  j                                                      j
                    370: ; is the exact fraction of a pixel at which the strip ends.  To go
                    371: ; on to the next scan and compute c    we need to know r   .
                    372: ;                                  j+1                  j+1
                    373: ;
                    374: ; So in the main loop of the DDA:
                    375: ;
                    376: ;    c  = dI + floor((r  + dR) / dN) and r    = (r  + dR) % dN
                    377: ;     j                j                  j+1     j
                    378: ;
                    379: ; and we know r  < dN, r    < dN, and dR < dN.
                    380: ;              j        j+1
                    381: ;
                    382: ; We have derived the DDA only for lines in the first octant; to
                    383: ; handle other octants we do the common trick of flipping the line
                    384: ; to the first octant by first making the line left-to-right by
                    385: ; exchanging the end-points, then flipping about the lines y = 0 and
                    386: ; y = x, as necessary.  We must record the transformation so we can
                    387: ; undo them later.
                    388: ;
                    389: ; We must also be careful of how the flips affect our rounding.  If
                    390: ; to get the line to the first octant we flipped about x = 0, we now
                    391: ; have to be careful to round a y value of 1/2 up instead of down as
                    392: ; we would for a line originally in the first octant (recall that
                    393: ; "In the case where two pels are equidistant, the upper or left
                    394: ; pel is illuminated...").
                    395: ;
                    396: ; To account for this rounding when running the DDA, we shift the line
                    397: ; (or not) in the y direction by the smallest amount possible.  That
                    398: ; takes care of rounding for the DDA, but we still have to be careful
                    399: ; about the rounding when determining the first and last pixels to be
                    400: ; lit in the line.
                    401: ;
                    402: ; Determining The First And Last Pixels In The Line
                    403: ; -------------------------------------------------
                    404: ;
                    405: ; Fractional coordinates also make it harder to determine which pixels
                    406: ; will be the first and last ones in the line.  We've already taken
                    407: ; the fractional coordinates into account in calculating the DDA, but
                    408: ; the DDA cannot tell us which are the end pixels because it is quite
                    409: ; happy to calculate pixels on the line from minus infinity to positive
                    410: ; infinity.
                    411: ;
                    412: ; The diamond rule determines the start and end pixels.  (Recall that
                    413: ; the sides are exclusive except for the left and top vertices.)
                    414: ; This convention can be thought of in another way: there are diamonds
                    415: ; around the pixels, and wherever the true line crosses a diamond,
                    416: ; that pel is illuminated.
                    417: ;
                    418: ; Consider a line where we've done the flips to the first octant, and the
                    419: ; floor of the start coordinates is the origin:
                    420: ;
                    421: ;        +-----------------------> +x
                    422: ;        |
                    423: ;        | 0                     1
                    424: ;        |     0123456789abcdef
                    425: ;        |
                    426: ;        |   0 00000000?1111111
                    427: ;        |   1 00000000 1111111
                    428: ;        |   2 0000000   111111
                    429: ;        |   3 000000     11111
                    430: ;        |   4 00000    ** 1111
                    431: ;        |   5 0000       ****1
                    432: ;        |   6 000           1***
                    433: ;        |   7 00             1  ****
                    434: ;        |   8 ?                     ***
                    435: ;        |   9 22             3         ****
                    436: ;        |   a 222           33             ***
                    437: ;        |   b 2222         333                ****
                    438: ;        |   c 22222       3333                    **
                    439: ;        |   d 222222     33333
                    440: ;        |   e 2222222   333333
                    441: ;        |   f 22222222 3333333
                    442: ;        |
                    443: ;        | 2                     3
                    444: ;        v
                    445: ;        +y
                    446: ;
                    447: ; If the start of the line lands on the diamond around pixel 0 (shown by
                    448: ; the '0' region here), pixel 0 is the first pel in the line.  The same
                    449: ; is true for the other pels.
                    450: ;
                    451: ; A little more work has to be done if the line starts in the
                    452: ; 'nether-land' between the diamonds (as illustrated by the '*' line):
                    453: ; the first pel lit is the first diamond crossed by the line (pixel 1 in
                    454: ; our example).  This calculation is determined by the DDA or slope of
                    455: ; the line.
                    456: ;
                    457: ; If the line starts exactly half way between two adjacent pixels
                    458: ; (denoted here by the '?' spots), the first pixel is determined by our
                    459: ; round-down convention (and is dependent on the flips done to
                    460: ; normalize the line).
                    461: ;
                    462: ; Last Pel Exclusive
                    463: ; ------------------
                    464: ;
                    465: ; To eliminate repeatedly lit pels between continuous connected lines,
                    466: ; we employ a last-pel exclusive convention: if the line ends exactly on
                    467: ; the diamond around a pel, that pel is not lit.  (This eliminates the
                    468: ; checks we had in the old code to see if we were re-lighting pels.)
                    469: ;
                    470: ; The Half Flip
                    471: ; -------------
                    472: ;
                    473: ; To make our run length algorithm more efficient, we employ a "half
                    474: ; flip".  If after normalizing to the first octant, the slope is more
                    475: ; than 1/2, we subtract the y coordinate from the x coordinate.  This
                    476: ; has the effect of reflecting the coordinates through the line of slope
                    477: ; 1/2.  Note that the diagonal gets mapped into the x-axis after a half
                    478: ; flip.
                    479: ;
                    480: ; How Many Bits Do We Need, Anyway?
                    481: ; ---------------------------------
                    482: ;
                    483: ; Note that if the line is visible on your screen, you must light up
                    484: ; exactly the correct pixels, no matter where in the 28.4 x 28.4 device
                    485: ; space the end points of the line lie (meaning you must handle 32 bit
                    486: ; DDAs, you can certainly have optimized cases for lesser DDAs).
                    487: ;
                    488: ; We move the origin to (floor(M0 / F), floor(N0 / F)), so when we
                    489: ; calculate gamma from (5), we know that 0 <= M0, N0 < F.  And we
                    490: ; are in the first octant, so dM >= dN.  Then we know that gamma can
                    491: ; be in the range [(-1/2)dM, (3/2)dM].  The DDI guarantees us that
                    492: ; valid lines will have dM and dN values at most 31 bits (unsigned)
                    493: ; of significance.  So gamma requires 33 bits of significance (we store
                    494: ; this as a 64 bit number for convenience).
                    495: ;
                    496: ; When running through the DDA loop, r  + dR can have a value in the
                    497: ;                                     j
                    498: ; range 0 <= r  < 2 dN; thus the result must be a 32 bit unsigned value.
                    499: ;             j
                    500: ;
                    501: ; Testing Lines
                    502: ; -------------
                    503: ;
                    504: ; To be NT compliant, a display driver must exactly adhere to GIQ,
                    505: ; which means that for any given line, the driver must light exactly
                    506: ; the same pels as does GDI.  This can be tested using the Guiman tool
                    507: ; provided elsewhere in the DDK, and 'ZTest', which draws random lines
                    508: ; on the screen and to a bitmap, and compares the results.
                    509: ;
                    510: ; If You've Got Line Hardware
                    511: ; ---------------------------
                    512: ;
                    513: ; If your hardware already adheres to GIQ, you're all set.  Otherwise
                    514: ; you'll want to look at the S3 sample code and read the following:
                    515: ;
                    516: ; 1) You'll want to special case integer-only lines, since they require
                    517: ;    less processing time and are more common (CAD programs will probably
                    518: ;    only ever give integer lines).  GDI does not provide a flag saying
                    519: ;    that all lines in a path are integer lines; consequently, you will
                    520: ;    have to explicitly check every line.
                    521: ;
                    522: ; 2) You are required to correctly draw any line in the 28.4 device
                    523: ;    space that intersects the viewport.  If you have less than 32 bits
                    524: ;    of significance in the hardware for the Bresenham terms, extremely
                    525: ;    long lines would overflow the hardware.  For such (rare) cases, you
                    526: ;    can fall back to strip-drawing code, of which there is a C version in
                    527: ;    the S3's lines.cxx (or if your display is a frame buffer, fall back
                    528: ;    to the engine).
                    529: ;
                    530: ; 3) If you can explicitly set the Bresenham terms in your hardware, you
                    531: ;    can draw non-integer lines using the hardware.  If your hardware has
                    532: ;    'n' bits of precision, you can draw GIQ lines that are up to 2^(n-5)
                    533: ;    pels long (4 bits are required for the fractional part, and one bit is
                    534: ;    used as a sign bit).  Note that integer lines don't require the 4
                    535: ;    fractional bits, so if you special case them as in 1), you can do
                    536: ;    integer lines that are up to 2^(n - 1) pels long.  See the S3's
                    537: ;    fastline.asm for an example.
                    538: ;
                    539: ;-----------------------------------------------------------------------;
                    540: 
                    541: cProc   bLines,36,< \
                    542:     uses esi edi ebx,  \
                    543:     pdsurf:     ptr,   \
                    544:     pptfxFirst: ptr,   \
                    545:     pptfxBuf:   ptr,   \
                    546:     prun:       ptr,   \
                    547:     cptfx:      dword, \
                    548:     pls:        ptr,   \
                    549:     prclClip:   ptr,   \
                    550:     apfn:       ptr,   \
                    551:     flStart:    dword  >
                    552: 
                    553: ; pdsurf:     Surface data
                    554: ; pptfxFirst: Start point of first line
                    555: ; pptfxBuf:   All subsequent points
                    556: ; prun:       Array of runs if doing complex clipping
                    557: ; cptfx:      Number of points in pptfxBuf (i.e., # lines)
                    558: ; pls:        Line state
                    559: ; prclClip:   Clip rectangle if doing simple clipping
                    560: ; apfn:       Pointer to table of strip drawers
                    561: ; flStart:    Flags for all lines
                    562: 
                    563:         local cPelsAfterThisBank:    dword ; For bank switching
                    564:         local cStripsInNextRun:      dword ; For bank switching
                    565:         local pptfxBufEnd:           ptr   ; Last point in pptfxBuf
                    566:         local M0:                    dword ; Normalized x0 in device coords
                    567:         local dM:                    dword ; Delta-x in device coords
                    568:         local N0:                    dword ; Normalized y0 in device coords
                    569:         local dN:                    dword ; Delta-y in device coords
                    570:         local fl:                    dword ; Flags for current line
                    571:         local x:                     dword ; Normalized start pixel x-coord
                    572:         local y:                     dword ; Normalized start pixel y-coord
                    573:         local eqGamma_lo:            dword ; Upper 32 bits of Gamma
                    574:         local eqGamma_hi:            dword ; Lower 32 bits of Gamma
                    575:         local x0:                    dword ; Start pixel x-offset
                    576:         local y0:                    dword ; Start pixel y-offset
                    577:         local ulSlopeOneAdjustment:  dword ; Special offset if line of slope 1
                    578:         local cStylePels:            dword ; # of pixels in line (before clip)
                    579:         local xStart:                dword ; Start pixel x-offset before clip
                    580:         local pfn:                   ptr   ; Pointer to strip drawing function
                    581:         local cPels:                 dword ; # pixels to be drawn (after clip)
                    582:         local i:                     dword ; # pixels in strip
                    583:         local r:                     dword ; Remainder (or "error") term
                    584:         local d_I:                   dword ; Delta-I
                    585:         local d_R:                   dword ; Delta-R
                    586:         local plStripEnd:            ptr   ; Last strip in buffer
                    587:         local ptlStart[size POINTL]: byte  ; Unnormalized start coord
                    588:         local dN_Original:           dword ; dN before half-flip
                    589:         local xClipLeft:             dword ; Left side of clip rectangle
                    590:         local xClipRight:            dword ; Right side of clip rectangle
                    591:         local strip[size STRIPS]:    byte  ; Our strip buffer
                    592: 
                    593: ; Do some initializing:
                    594: 
                    595:         mov     ecx, cptfx
                    596:         mov     edx, pptfxBuf
                    597:         lea     eax, [edx + ecx * (size POINTL) - (size POINTL)]
                    598:         mov     pptfxBufEnd, eax        ; pptfxBufEnd is inclusive of end point
                    599: 
                    600:         mov     eax, [edx].ptl_x        ; Load up end point (M1, N1)
                    601:         mov     edi, [edx].ptl_y
                    602: 
                    603:         mov     edx, pptfxFirst         ; Load up start point (M0, N0)
                    604:         mov     esi, [edx].ptl_x
                    605:         mov     ecx, [edx].ptl_y
                    606: 
                    607:         mov     ebx, flStart
                    608: 
                    609: ;-----------------------------------------------------------------------;
                    610: ; Flip to the first octant.                                             ;
                    611: ;-----------------------------------------------------------------------;
                    612: 
                    613: ; Register state:       esi = M0
                    614: ;                       ecx = N0
                    615: ;                       eax = dM (M1)
                    616: ;                       edi = dN (N1)
                    617: ;                       ebx = fl
                    618: 
                    619: ; Make sure we go left to right:
                    620: 
                    621: the_main_loop:
                    622:         cmp     esi, eax
                    623:         jle     short is_left_to_right  ; skip if M0 <= M1
                    624:         xchg    esi, eax                ; swap M0, M1
                    625:         xchg    ecx, edi                ; swap N0, N1
                    626:         or      ebx, FL_FLIP_H
                    627: 
                    628: is_left_to_right:
                    629: 
                    630: ; Compute the deltas, remembering that the DDI says we should get
                    631: ; deltas less than 2^31.  If we get more, we ensure we don't crash
                    632: ; later on by simply skipping the line:
                    633: 
                    634:         sub     eax, esi                ; eax = dM
                    635:         jo      next_line               ; dM must be less than 2^31
                    636:         sub     edi, ecx                ; edi = dN
                    637:         jo      next_line               ; dN must be less than 2^31
                    638: 
                    639:         jge     short is_top_to_bottom  ; skip if dN >= 0
                    640:         neg     ecx                     ; N0 = -N0
                    641:         neg     edi                     ; N1 = -N1
                    642:         or      ebx, FL_FLIP_V
                    643: 
                    644: is_top_to_bottom:
                    645:         cmp     edi, eax
                    646:         jb      short done_flips        ; skip if dN < dM
                    647:         jne     short slope_more_than_one
                    648: 
                    649: ; We must special case slopes of one (because of our rounding convention):
                    650: 
                    651:         or      ebx, FL_FLIP_SLOPE_ONE
                    652:         jmp     short done_flips
                    653: 
                    654: slope_more_than_one:
                    655:         xchg    eax, edi                ; swap dM, dN
                    656:         xchg    esi, ecx                ; swap M0, N0
                    657:         or      ebx, FL_FLIP_D
                    658: 
                    659: done_flips:
                    660: 
                    661:         mov     edx, ebx
                    662:         and     edx, FL_ROUND_MASK
                    663:         .errnz  FL_ROUND_SHIFT - 2
                    664:         or      ebx, [gaflRoundTable + edx]  ; get our rounding flags
                    665: 
                    666:         mov     dM, eax                 ; save some info
                    667:         mov     dN, edi
                    668:         mov     fl, ebx
                    669: 
                    670: ; We're going to shift our origin so that it's at the closest integer
                    671: ; coordinate to the left/above our fractional start point (it makes
                    672: ; the math quicker):
                    673: 
                    674:         mov     edx, esi                ; x = LFLOOR(M0)
                    675:         sar     edx, FLOG2
                    676:         mov     x, edx
                    677: 
                    678:         mov     edx, ecx                ; y = LFLOOR(N0)
                    679:         sar     edx, FLOG2
                    680:         mov     y, edx
                    681: 
                    682: ;-----------------------------------------------------------------------;
                    683: ; Compute the fractional remainder term                                 ;
                    684: ;-----------------------------------------------------------------------;
                    685: 
                    686: ; By shifting the origin we've contrived to eliminate the integer
                    687: ; portion of our fractional start point, giving us start point
                    688: ; fractional coordinates in the range [0, F - 1]:
                    689: 
                    690:         and     esi, F - 1              ; M0 = FXFRAC(M0)
                    691:         and     ecx, F - 1              ; N0 = FXFRAC(N0)
                    692: 
                    693: ; We now compute Gamma:
                    694: 
                    695:         mov     M0, esi                 ; save M0, N0 for later
                    696:         mov     N0, ecx
                    697: 
                    698:         lea     edx, [ecx + F/2]
                    699:         mul     edx                     ; [edx:eax] = dM * (N0 + F/2)
                    700:         xchg    eax, edi
                    701:         mov     ecx, edx                ; [ecx:edi] = dM * (N0 + F/2)
                    702:                                         ; (we just nuked N0)
                    703: 
                    704:         mul     esi                     ; [edx:eax] = dN * M0
                    705: 
                    706: ; Now gamma = dM * (N0 + F/2) - dN * M0 - bRoundDown
                    707: 
                    708:         .errnz  FL_V_ROUND_DOWN - 8000h
                    709:         ror     bh, 8
                    710:         sbb     edi, eax
                    711:         sbb     ecx, edx
                    712: 
                    713:         shrd    edi, ecx, FLOG2
                    714:         sar     ecx, FLOG2              ; gamma = [ecx:edi] >>= 4
                    715: 
                    716:         mov     eqGamma_hi, ecx
                    717:         mov     eqGamma_lo, edi
                    718: 
                    719:         mov     eax, N0
                    720: 
                    721: ; Register state:
                    722: ;                       eax = N0
                    723: ;                       ebx = fl
                    724: ;                       ecx = eqGamma_hi
                    725: ;                       edx = garbage
                    726: ;                       esi = M0
                    727: ;                       edi = eqGamma_lo
                    728: 
                    729:         testb   ebx, FL_FLIP_H
                    730:         jnz     line_runs_right_to_left
                    731: 
                    732: ;-----------------------------------------------------------------------;
                    733: ; Figure out which pixels are at the ends of a left-to-right line.      ;
                    734: ;                               -------->                               ;
                    735: ;-----------------------------------------------------------------------;
                    736: 
                    737:         public line_runs_left_to_right
                    738: line_runs_left_to_right:
                    739:         or      esi, esi
                    740:         jz      short LtoR_check_slope_one
                    741:                                         ; skip ahead if M0 == 0
                    742:                                         ;   (in that case, x0 = 0 which is to be
                    743:                                         ;   kept in esi, and is already
                    744:                                         ;   conventiently zero)
                    745: 
                    746:         or      eax, eax
                    747:         jnz     short LtoR_N0_not_zero
                    748: 
                    749:         .errnz  FL_H_ROUND_DOWN - 80h
                    750:         ror     bl, 8
                    751:         sbb     esi, -F/2
                    752:         shr     esi, FLOG2
                    753:         jmp     short LtoR_check_slope_one
                    754:                                         ; esi = x0 = rounded M0
                    755: 
                    756: LtoR_N0_not_zero:
                    757:         sub     eax, F/2
                    758:         sbb     edx, edx
                    759:         xor     eax, edx
                    760:         sub     eax, edx
                    761:         cmp     esi, eax
                    762:         sbb     esi, esi
                    763:         inc     esi                     ; esi = x0 = (abs(N0 - F/2) <= M0)
                    764: 
                    765:         public  LtoR_check_slope_one
                    766: LtoR_check_slope_one:
                    767:         mov     ulSlopeOneAdjustment, 0
                    768:         mov     eax, ebx
                    769:         and     eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN
                    770:         cmp     eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN
                    771:         jne     short LtoR_compute_y0_from_x0
                    772: 
                    773: ; We have to special case lines that are exactly of slope 1 or -1:
                    774: 
                    775:         mov     eax, N0
                    776:         add     eax, dN
                    777:         and     eax, F - 1              ; eax = N1
                    778:         jz      short LtoR_slope_one_check_start_point
                    779: 
                    780:         mov     edx, M0
                    781:         add     edx, dM
                    782:         and     edx, F - 1              ; edx = M1
                    783: 
                    784:         add     eax, F/2
                    785:         cmp     edx, eax                ; cmp M1, N1 + F/2
                    786:         jne     short LtoR_slope_one_check_start_point
                    787:         mov     ulSlopeOneAdjustment, -1
                    788: 
                    789: LtoR_slope_one_check_start_point:
                    790:         mov     eax, M0
                    791:         or      eax, eax
                    792:         jz      short LtoR_compute_y0_from_x0
                    793: 
                    794:         add     eax, F/2
                    795:         cmp     eax, N0                 ; cmp M0 + 8, N0
                    796:         jne     short LtoR_compute_y0_from_x0
                    797: 
                    798:         xor     esi, esi                ; x0 = 0
                    799: 
                    800: LtoR_compute_y0_from_x0:
                    801: 
                    802: ; ecx = eqGamma_hi
                    803: ; esi = x0
                    804: ; edi = eqGamma_lo
                    805: 
                    806:         mov     eax, dN
                    807:         mov     edx, dM
                    808: 
                    809:         mov     x0, esi
                    810:         mov     y0, 0
                    811:         cmp     ecx, 0
                    812:         jl      short LtoR_compute_x1
                    813: 
                    814:         neg     esi
                    815:         and     esi, eax
                    816:         sub     edx, esi
                    817:         cmp     edi, edx
                    818:         mov     edx, dM
                    819:         jl      short LtoR_compute_x1
                    820:         mov     y0, 1                   ; y0 = floor((dN * x0 + eqGamma) / dM)
                    821: 
                    822: LtoR_compute_x1:
                    823: 
                    824: ; Register state:
                    825: ;                       eax = dN
                    826: ;                       ebx = fl
                    827: ;                       ecx = garbage
                    828: ;                       edx = dM
                    829: ;                       esi = garbage
                    830: ;                       edi = garbage
                    831: 
                    832:         mov     esi, M0
                    833:         add     esi, edx
                    834:         mov     ecx, esi
                    835:         shr     esi, FLOG2
                    836:         dec     esi                     ; x1 = ((M0 + dM) >> 4) - 1
                    837:         add     esi, ulSlopeOneAdjustment
                    838:         and     ecx, F-1                ; M1 = (M0 + dM) & 15
                    839:         jz      done_first_pel_last_pel
                    840: 
                    841:         add     eax, N0
                    842:         and     eax, F-1                ; N1 = (N0 + dN) & 15
                    843:         jnz     short LtoR_N1_not_zero
                    844: 
                    845:         .errnz  FL_H_ROUND_DOWN - 80h
                    846:         ror     bl, 8
                    847:         sbb     ecx, -F/2
                    848:         shr     ecx, FLOG2              ; ecx = LROUND(M1, fl & FL_ROUND_DOWN)
                    849:         add     esi, ecx
                    850:         jmp     done_first_pel_last_pel
                    851: 
                    852: LtoR_N1_not_zero:
                    853:         sub     eax, F/2
                    854:         sbb     edx, edx
                    855:         xor     eax, edx
                    856:         sub     eax, edx
                    857:         cmp     eax, ecx
                    858:         jg      done_first_pel_last_pel
                    859:         inc     esi
                    860:         jmp     done_first_pel_last_pel
                    861: 
                    862: ;-----------------------------------------------------------------------;
                    863: ; Figure out which pixels are at the ends of a right-to-left line.      ;
                    864: ;                               <--------                               ;
                    865: ;-----------------------------------------------------------------------;
                    866: 
                    867: ; Compute x0:
                    868: 
                    869:         public  line_runs_right_to_left
                    870: line_runs_right_to_left:
                    871:         mov     x0, 1                   ; x0 = 1
                    872:         or      eax, eax
                    873:         jnz     short RtoL_N0_not_zero
                    874: 
                    875:         xor     edx, edx                ; ulDelta = 0
                    876:         .errnz  FL_H_ROUND_DOWN - 80h
                    877:         ror     bl, 8
                    878:         sbb     esi, -F/2
                    879:         shr     esi, FLOG2              ; esi = LROUND(M0, fl & FL_H_ROUND_DOWN)
                    880:         jz      short RtoL_check_slope_one
                    881: 
                    882:         mov     x0, 2
                    883:         mov     edx, dN
                    884:         jmp     short RtoL_check_slope_one
                    885: 
                    886: RtoL_N0_not_zero:
                    887:         sub     eax, F/2
                    888:         sbb     edx, edx
                    889:         xor     eax, edx
                    890:         sub     eax, edx
                    891:         add     eax, esi                ; eax = ABS(N0 - F/2) + M0
                    892:         xor     edx, edx                ; ulDelta = 0
                    893:         cmp     eax, F
                    894:         jle     short RtoL_check_slope_one
                    895: 
                    896:         mov     x0, 2                   ; x0 = 2
                    897:         mov     edx, dN                 ; ulDelta = dN
                    898: 
                    899:         public  RtoL_check_slope_one
                    900: RtoL_check_slope_one:
                    901:         mov     ulSlopeOneAdjustment, 0
                    902:         mov     eax, ebx
                    903:         and     eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN
                    904:         cmp     eax, FL_FLIP_SLOPE_ONE
                    905:         jne     short RtoL_compute_y0_from_x0
                    906: 
                    907: ; We have to special case lines that are exactly of slope 1 or -1:
                    908: 
                    909:         mov     eax, N0
                    910:         add     eax, dN
                    911:         and     eax, F - 1              ; eax = N1
                    912:         jz      short RtoL_slope_one_check_start_point
                    913: 
                    914:         mov     esi, M0
                    915:         add     esi, dM
                    916:         and     esi, F - 1              ; esi = M1
                    917: 
                    918:         add     eax, F/2
                    919:         cmp     esi, eax                ; cmp M1, N1 + F/2
                    920:         jne     short RtoL_slope_one_check_start_point
                    921:         mov     ulSlopeOneAdjustment, 1
                    922: 
                    923: RtoL_slope_one_check_start_point:
                    924:         mov     eax, M0
                    925:         or      eax, eax
                    926:         jz      short RtoL_compute_y0_from_x0
                    927: 
                    928:         add     eax, F/2
                    929:         cmp     eax, N0                 ; cmp M0 + 8, N0
                    930:         jne     short RtoL_compute_y0_from_x0
                    931: 
                    932:         mov     x0, 2                   ; x0 = 2
                    933:         mov     edx, dN                 ; ulDelta = dN
                    934: 
                    935: RtoL_compute_y0_from_x0:
                    936: 
                    937: ; eax = garbage
                    938: ; ebx = fl
                    939: ; ecx = eqGamma_hi
                    940: ; edx = ulDelta
                    941: ; esi = garbage
                    942: ; edi = eqGamma_lo
                    943: 
                    944:         mov     eax, dN                 ; eax = dN
                    945:         mov     y0, 0                   ; y0 = 0
                    946: 
                    947:         add     edi, edx
                    948:         adc     ecx, 0                  ; eqGamma += ulDelta
                    949:                                         ; NOTE: Setting flags here!
                    950:         mov     edx, dM                 ; edx = dM
                    951:         jl      short RtoL_compute_x1   ; NOTE: Looking at the flags here!
                    952:         jg      short RtoL_y0_is_2
                    953: 
                    954:         lea     ecx, [edx + edx]
                    955:         sub     ecx, eax                ; ecx = 2 * dM - dN
                    956:         cmp     edi, ecx
                    957:         jge     short RtoL_y0_is_2
                    958: 
                    959:         sub     ecx, edx                ; ecx = dM - dN
                    960:         cmp     edi, ecx
                    961:         jl      short RtoL_compute_x1
                    962: 
                    963:         mov     y0, 1
                    964:         jmp     short RtoL_compute_x1
                    965: 
                    966: RtoL_y0_is_2:
                    967:         mov     y0, 2
                    968: 
                    969: RtoL_compute_x1:
                    970: 
                    971: ; Register state:
                    972: ;                       eax = dN
                    973: ;                       ebx = fl
                    974: ;                       ecx = garbage
                    975: ;                       edx = dM
                    976: ;                       esi = garbage
                    977: ;                       edi = garbage
                    978: 
                    979:         mov     esi, M0
                    980:         add     esi, edx
                    981:         mov     ecx, esi
                    982:         shr     esi, FLOG2              ; x1 = (M0 + dM) >> 4
                    983:         add     esi, ulSlopeOneAdjustment
                    984:         and     ecx, F-1                ; M1 = (M0 + dM) & 15
                    985: 
                    986:         add     eax, N0
                    987:         and     eax, F-1                ; N1 = (N0 + dN) & 15
                    988:         jnz     short RtoL_N1_not_zero
                    989: 
                    990:         .errnz  FL_H_ROUND_DOWN - 80h
                    991:         ror     bl, 8
                    992:         sbb     ecx, -F/2
                    993:         shr     ecx, FLOG2              ; ecx = LROUND(M1, fl & FL_ROUND_DOWN)
                    994:         add     esi, ecx
                    995:         jmp     done_first_pel_last_pel
                    996: 
                    997: RtoL_N1_not_zero:
                    998:         sub     eax, F/2
                    999:         sbb     edx, edx
                   1000:         xor     eax, edx
                   1001:         sub     eax, edx
                   1002:         add     eax, ecx                ; eax = ABS(N1 - F/2) + M1
                   1003:         cmp     eax, F+1
                   1004:         sbb     esi, -1
                   1005: 
                   1006: done_first_pel_last_pel:
                   1007: 
                   1008: ; Register state:
                   1009: ;                       eax = garbage
                   1010: ;                       ebx = fl
                   1011: ;                       ecx = garbage
                   1012: ;                       edx = garbage
                   1013: ;                       esi = x1
                   1014: ;                       edi = garbage
                   1015: 
                   1016:         mov     ecx, x0
                   1017:         lea     edx, [esi + 1]
                   1018:         sub     edx, ecx                ; edx = x1 - x0 + 1
                   1019: 
                   1020:         jle     next_line
                   1021:         mov     cStylePels, edx
                   1022:         mov     xStart, ecx
                   1023: 
                   1024: ;-----------------------------------------------------------------------;
                   1025: ; See if clipping or styling needs to be done.                          ;
                   1026: ;-----------------------------------------------------------------------;
                   1027: 
                   1028:         testb   ebx, FL_CLIP
                   1029:         jnz     do_some_clipping
                   1030: 
                   1031: ; Register state:
                   1032: ;                       eax = garbage
                   1033: ;                       ebx = fl
                   1034: ;                       ecx = x0        (stack variable correct too)
                   1035: ;                       edx = garbage
                   1036: ;                       esi = x1
                   1037: ;                       edi = garbage
                   1038: 
                   1039: done_clipping:
                   1040:         mov     eax, y0
                   1041: 
                   1042:         sub     esi, ecx
                   1043:         inc     esi                     ; esi = cPels = x1 - x0 + 1
                   1044:         mov     cPels, esi
                   1045: 
                   1046:         mov     esi, pdsurf
                   1047:         add     ecx, x                  ; ecx = ptlStart.ptl_x
                   1048:         add     eax, y                  ; eax = ptlStart.ptl_y
                   1049: 
                   1050:         mov     esi, [esi].dsurf_lNextScan ; we'll compute the sign of lNextScan
                   1051: 
                   1052:         testb   ebx, FL_FLIP_D
                   1053:         jz      short do_v_unflip
                   1054:         xchg    ecx, eax
                   1055: 
                   1056: do_v_unflip:
                   1057:         testb   ebx, FL_FLIP_V
                   1058:         jz      short done_unflips
                   1059:         neg     eax
                   1060:         neg     esi
                   1061: 
                   1062: done_unflips:
                   1063:         mov     strip.ST_lNextScan, esi ; lNextScan now right for y-direction
                   1064:         testb   ebx, FL_STYLED
                   1065:         jnz     do_some_styling
                   1066: 
                   1067: done_styling:
                   1068:         lea     edx, [strip.ST_alStrips + (STRIP_MAX * 4)]
                   1069:         mov     plStripEnd, edx
                   1070: 
                   1071:         mov     cPelsAfterThisBank, 0
                   1072:         mov     cStripsInNextRun, 7fffffffh
                   1073: 
                   1074:         testb   ebx, FL_PHYSICAL_DEVICE
                   1075:         jz      done_bank_setup
                   1076: 
                   1077: ;-----------------------------------------------------------------------;
                   1078: ; Do banking setup.                                                     ;
                   1079: ;-----------------------------------------------------------------------;
                   1080: 
                   1081:         public  bank_setup
                   1082: bank_setup:
                   1083: 
                   1084: ; Register state:
                   1085: ;                       eax = ptlStart.ptl_y
                   1086: ;                       ebx = fl
                   1087: ;                       ecx = ptlStart.ptl_x
                   1088: ;                       edx = garbage
                   1089: ;                       esi = garbage
                   1090: ;                       edi = garbage
                   1091: 
                   1092:         mov     esi, pdsurf
                   1093:         cmp     eax, [esi].dsurf_rcl1WindowClip.yTop
                   1094:         jl      short bank_get_initial_bank   ; ptlStart.y < rcl1WindowClip.yTop
                   1095: 
                   1096:         cmp     eax, [esi].dsurf_rcl1WindowClip.yBottom
                   1097:         jl      short bank_got_initial_bank   ; ptlStart.y < rcl1WindowClip.yBot
                   1098: 
                   1099: bank_get_initial_bank:
                   1100:         mov     ptlStart.ptl_y, eax     ; Save ptlStart.ptl_y
                   1101:         mov     edi, ecx                ; Save ptlStart.ptl_x
                   1102: 
                   1103:         .errnz  JustifyTop
                   1104:         .errnz  JustifyBottom - 1
                   1105:         .errnz  FL_FLIP_V - 8
                   1106: 
                   1107:         mov     ecx, ebx                ; JustifyTop if line goes down,
                   1108:         shr     ecx, 3                  ; JustifyBottom if line goes up
                   1109:         and     ecx, 1
                   1110: 
                   1111: bank_justified:
                   1112:         ptrCall <dword ptr [esi].dsurf_pfnBankControl>, \
                   1113:                 <esi, eax, ecx>
                   1114: 
                   1115:         mov     eax, ptlStart.ptl_y
                   1116:         mov     ecx, edi
                   1117: 
                   1118: bank_got_initial_bank:
                   1119:         testb   ebx, FL_FLIP_D
                   1120:         jz      short bank_major_x
                   1121: 
                   1122: bank_major_y:
                   1123:         testb   ebx, FL_FLIP_V
                   1124:         jz      short bank_major_y_down
                   1125: bank_major_y_up:
                   1126:         lea     edi, [eax + 1]
                   1127:         sub     edi, [esi].dsurf_rcl1WindowClip.yTop
                   1128:         jmp     short bank_done_y_major
                   1129: bank_major_y_down:
                   1130:         mov     edi, [esi].dsurf_rcl1WindowClip.yBottom
                   1131:         sub     edi, eax
                   1132: bank_done_y_major:
                   1133:         mov     esi, cPels
                   1134:         sub     esi, edi                ; edi = cPelsInBank
                   1135:         mov     cPelsAfterThisBank, esi
                   1136:         jle     short done_bank_setup
                   1137:         mov     cPels, edi
                   1138:         jmp     short done_bank_setup
                   1139: 
                   1140: bank_major_x:
                   1141:         mov     edi, dN
                   1142:         shr     edi, FLOG2
                   1143:         add     edi, y
                   1144: 
                   1145: ; We're guessing at the y-position of the end pixel (it's too much work
                   1146: ; to compute the actual value) to see if the line spans more than one
                   1147: ; bank.  We have to add at least a slop value of '3' because the actual
                   1148: ; start pixel may be may 2 off from 'y' because of end-pixel exclusiveness,
                   1149: ; and we have to add 1 more because we're taking the floor of (dN / F), to
                   1150: ; account for rounding:
                   1151: 
                   1152:         add     edi, 3                  ; yEnd = edi = y + LFLOOR(dN) + 3
                   1153:         testb   ebx, FL_FLIP_V
                   1154:         jz      short bank_major_x_down
                   1155: bank_major_x_up:
                   1156:         mov     edx, 1
                   1157:         sub     edx, [esi].dsurf_rcl1WindowClip.yTop    ; edx = -yNextBankStart
                   1158: 
                   1159:         cmp     edi, edx
                   1160:         lea     edx, [edx + eax]        ; edx = cStripsInNextRun
                   1161:         jl      short bank_major_x_done
                   1162: 
                   1163: ; Line may go over bank boundary, so don't do a half flip:
                   1164: 
                   1165:         or      ebx, FL_DONT_DO_HALF_FLIP
                   1166:         jmp     short bank_major_x_done
                   1167: 
                   1168: bank_major_x_down:
                   1169:         mov     esi, [esi].dsurf_rcl1WindowClip.yBottom  ; esi = yNextBankStart
                   1170: 
                   1171:         mov     edx, esi
                   1172:         sub     edx, eax                ; edx = cStripsInNextRun
                   1173: 
                   1174:         cmp     edi, esi
                   1175:         jl      short bank_major_x_done
                   1176:         or      ebx, FL_DONT_DO_HALF_FLIP
                   1177: 
                   1178: bank_major_x_done:
                   1179:         sub     edx, STRIP_MAX
                   1180:         mov     cStripsInNextRun, edx
                   1181:         jge     short done_bank_setup
                   1182: 
                   1183:         lea     edx, [strip.ST_alStrips + edx * 4 + (STRIP_MAX * 4)]
                   1184:         mov     plStripEnd, edx
                   1185: 
                   1186: done_bank_setup:
                   1187: 
                   1188: ;-----------------------------------------------------------------------;
                   1189: ; Setup to do DDA.                                                      ;
                   1190: ;-----------------------------------------------------------------------;
                   1191: 
                   1192: ; Register state:
                   1193: ;                       eax = ptlStart.ptl_y
                   1194: ;                       ebx = fl
                   1195: ;                       ecx = ptlStart.ptl_x
                   1196: ;                       edx = garbage
                   1197: ;                       esi = garbage
                   1198: ;                       edi = garbage
                   1199: 
                   1200:         mov     edx, 80h
                   1201:         ror     dl, cl
                   1202:         mov     strip.ST_jBitMask, dl   ; ST_jBitMask =
                   1203:                                         ;     (0x80 >> (ptlStart.ptl_x & 0x7))
                   1204: 
                   1205:         mov     esi, pdsurf
                   1206:         mov     edi, eax                ; Now edi = ptlStart.ptl_y
                   1207:         imul    [esi].dsurf_lNextScan
                   1208:         add     eax, [esi].dsurf_pvBitmapStart
                   1209:         sar     ecx, 3
                   1210:         add     eax, ecx
                   1211:         mov     strip.ST_pjScreen, eax  ; ST_pjScreen = pchBits + ptlStart.ptl_y *
                   1212:                                         ;     cjDelta + (ptlStart.ptl_x >> 3)
                   1213: 
                   1214:         mov     eax, dM
                   1215:         mov     ecx, dN
                   1216:         mov     esi, eqGamma_lo
                   1217:         mov     edi, eqGamma_hi
                   1218: 
                   1219: ; Register state:
                   1220: ;                       eax = dM
                   1221: ;                       ebx = fl
                   1222: ;                       ecx = dN
                   1223: ;                       edx = garbage
                   1224: ;                       esi = eqGamma_lo
                   1225: ;                       edi = eqGamma_hi
                   1226: 
                   1227:         lea     edx, [ecx + ecx]        ; if (2 * dN > dM)
                   1228:         cmp     edx, eax
                   1229:         mov     edx, y0                 ; Load y0 again
                   1230:         jbe     short after_half_flip
                   1231: 
                   1232:         test    ebx, (FL_STYLED + FL_DONT_DO_HALF_FLIP)
                   1233:         jnz     short after_half_flip
                   1234: 
                   1235:         or      ebx, FL_FLIP_HALF
                   1236:         mov     fl, ebx
                   1237: 
                   1238: ; Do a half flip!
                   1239: 
                   1240:         not     esi
                   1241:         not     edi
                   1242:         add     esi, eax
                   1243:         adc     edi, 0                  ; eqGamma = -eqGamma - 1 + dM
                   1244: 
                   1245:         neg     ecx
                   1246:         add     ecx, eax                ; dN = dM - dN
                   1247: 
                   1248:         neg     edx
                   1249:         add     edx, x0                 ; y0 = x0 - y0
                   1250: 
                   1251: after_half_flip:
                   1252:         mov     strip.ST_flFlips, ebx
                   1253:         and     ebx, FL_STRIP_MASK
                   1254: 
                   1255:         .errnz  FL_STRIP_SHIFT
                   1256:         mov     eax, apfn
                   1257:         lea     eax, [eax + ebx * 4]
                   1258:         mov     eax, [eax]
                   1259:         mov     pfn, eax
                   1260:         mov     eax, dM
                   1261: 
                   1262: ; Register state:
                   1263: ;                       eax = dM
                   1264: ;                       ebx = garbage
                   1265: ;                       ecx = dN
                   1266: ;                       edx = y0
                   1267: ;                       esi = eqGamma_lo
                   1268: ;                       edi = eqGamma_hi
                   1269: 
                   1270:         or      ecx, ecx
                   1271:         jz      short zero_slope
                   1272: 
                   1273: compute_dda_stuff:
                   1274:         inc     edx
                   1275:         mul     edx
                   1276:         stc                             ; set the carry to accomplish -1
                   1277:         sbb     eax, esi
                   1278:         sbb     edx, edi                ; (y0 + 1) * dM - eqGamma - 1
                   1279:         div     ecx
                   1280: 
                   1281:         mov     esi, eax                ; esi = i
                   1282:         mov     edi, edx                ; edi = r
                   1283: 
                   1284:         xor     edx, edx
                   1285:         mov     eax, dM
                   1286:         div     ecx                     ; edx = d_R, eax = d_I
                   1287:         mov     d_I, eax
                   1288: 
                   1289:         sub     esi, x0
                   1290:         inc     esi
                   1291: 
                   1292: done_dda_stuff:
                   1293:         lea     eax, [strip.ST_alStrips]
                   1294:         mov     ebx, cPels
                   1295: 
                   1296: ;-----------------------------------------------------------------------;
                   1297: ; Do our main DDA loop.                                                 ;
                   1298: ;-----------------------------------------------------------------------;
                   1299: 
                   1300:         sub     edi, ecx                ; offset remainder term from [0..dN)
                   1301:                                         ;   to [-dN..0) so test in inner
                   1302:                                         ;   loop is quicker
                   1303:         align   4
                   1304: 
                   1305: ; Register state:
                   1306: ;                       eax = plStrip   ; current pointer into strip array
                   1307: ;                       ebx = cPels     ; total number of pels in line
                   1308: ;                       ecx = dN        ; delta-N = rise in line
                   1309: ;                       edx = d_R       ; d_I + d_R/dN = exact strip length
                   1310: ;                       esi = i         ; length of current strip
                   1311: ;                       edi = r         ; remainder term for current strip
                   1312: ;                                       ;   in range [-dN..0)
                   1313: 
                   1314:         public  dda_loop
                   1315: dda_loop:
                   1316:         sub     ebx, esi                ; subtract strip length from line length
                   1317:         jle     final_strip             ; if negative, done with line
                   1318: 
                   1319:         mov     [eax], esi              ; write strip length to strip array
                   1320:         add     eax, 4
                   1321:         cmp     plStripEnd, eax         ; is the strip array buffer full?
                   1322:         jbe     short output_strips     ; if so, empty it
                   1323: 
                   1324: ; The output_strips routine jumps to here when done:
                   1325: 
                   1326: done_output_strips:
                   1327:         mov     esi, d_I                ; our normal strip length
                   1328:         add     edi, edx                ; adjust our remainder term
                   1329:         jl      short dda_loop
                   1330: 
                   1331:         sub     edi, ecx                ; our remainder became 1 or more, so
                   1332:         inc     esi                     ;   we increment this strip length
                   1333:                                         ;   and adjust the remainder term
                   1334: 
                   1335: ; We've unrolled our loop a bit, so this should look familiar to the above:
                   1336: 
                   1337:         sub     ebx, esi                ; subtract strip length from line length
                   1338:         jle     final_strip             ; if negative, done with line
                   1339: 
                   1340:         mov     [eax], esi              ; write strip length to strip array
                   1341:         add     eax, 4                  ; adjust strip pointer
                   1342: 
                   1343: ; Note that banking requires us to check if the strip array is full here
                   1344: ; too (and note that if output_strips is called it will return to
                   1345: ; done_output_strips):
                   1346: 
                   1347:         cmp     plStripEnd, eax
                   1348:         jbe     short output_strips
                   1349: 
                   1350:         mov     esi, d_I                ; our normal strip length
                   1351:         add     edi, edx                ; adjust our remainder term
                   1352:         jl      short dda_loop
                   1353: 
                   1354:         sub     edi, ecx                ; our remainder became 1 or more, so
                   1355:         inc     esi                     ; adjust
                   1356:         jmp     short dda_loop
                   1357: 
                   1358: zero_slope:
                   1359:         mov     esi, 7fffffffh
                   1360:         jmp     short done_dda_stuff
                   1361: 
                   1362: ;-----------------------------------------------------------------------;
                   1363: ; Empty strips buffer & possibly do x-major bank switch.                ;
                   1364: ;-----------------------------------------------------------------------;
                   1365: 
                   1366: output_strips:
                   1367:         mov     d_R, edx
                   1368:         mov     cPels, ebx
                   1369:         mov     i, esi
                   1370:         mov     r, edi
                   1371:         mov     dN, ecx
                   1372: 
                   1373:         lea     edx, [strip]
                   1374:         mov     ecx, pls
                   1375: 
                   1376: ; Call our strip routine:
                   1377: 
                   1378:         ptrCall <dword ptr pfn>, \
                   1379:                 <edx, ecx, eax>
                   1380: 
                   1381: ; It may be that we ran out of run in our strips buffer, and don't
                   1382: ; actually have to switch banks.  See if that's the case:
                   1383: 
                   1384:         mov     eax, cStripsInNextRun
                   1385:         or      eax, eax
                   1386:         jg      short done_strip_bank_switch
                   1387: 
                   1388: ; We have to switch banks.  See if we're going up or down:
                   1389: 
                   1390:         mov     esi, pdsurf
                   1391:         test    fl, FL_FLIP_V
                   1392:         jz      short bank_x_down
                   1393: 
                   1394: bank_x_up:
                   1395:         mov     edi, strip.ST_pjScreen
                   1396:         sub     edi, [esi].dsurf_pvBitmapStart
                   1397:         mov     ebx, [esi].dsurf_rcl1WindowClip.yTop
                   1398:         dec     ebx                     ; we want yTop - 1 to be mapped in
                   1399: 
                   1400: ; Map in the next higher bank:
                   1401: 
                   1402:         ptrCall <dword ptr [esi].dsurf_pfnBankControl>, \
                   1403:                 <esi, ebx, JustifyBottom>; ebx, esi and edi are preserved
                   1404: 
                   1405:         lea     eax, [ebx + 1]
                   1406:         sub     eax, [esi].dsurf_rcl1WindowClip.yTop
                   1407:                                         ; eax = # of scans can do in bank
                   1408: 
                   1409:         add     edi, [esi].dsurf_pvBitmapStart
                   1410:         mov     strip.ST_pjScreen, edi
                   1411: 
                   1412:         jmp     short done_strip_bank_switch
                   1413: 
                   1414: bank_x_down:
                   1415:         mov     edi, strip.ST_pjScreen
                   1416:         sub     edi, [esi].dsurf_pvBitmapStart
                   1417:         mov     ebx, [esi].dsurf_rcl1WindowClip.yBottom
                   1418: 
                   1419: ; Map in the next lower bank:
                   1420: 
                   1421:         ptrCall <dword ptr [esi].dsurf_pfnBankControl>, \
                   1422:                 <esi, ebx, JustifyTop>  ; ebx, esi and edi are preserved
                   1423: 
                   1424:         mov     eax, [esi].dsurf_rcl1WindowClip.yBottom
                   1425:         sub     eax, ebx                ; eax = # scans can do in bank
                   1426: 
                   1427:         add     edi, [esi].dsurf_pvBitmapStart
                   1428:         mov     strip.ST_pjScreen,edi
                   1429: 
                   1430: done_strip_bank_switch:
                   1431: 
                   1432: ; eax = cStripsInNextRun
                   1433: 
                   1434:         lea     edx, [strip.ST_alStrips + (STRIP_MAX * 4)]
                   1435:         sub     eax, STRIP_MAX
                   1436:         mov     cStripsInNextRun, eax
                   1437:         jge     short get_ready_for_more_strips
                   1438:         lea     edx, [edx + eax * 4]
                   1439: 
                   1440: get_ready_for_more_strips:
                   1441:         mov     plStripEnd, edx
                   1442: 
                   1443:         mov     esi, i
                   1444:         mov     edi, r
                   1445:         mov     ebx, cPels
                   1446:         mov     edx, d_R
                   1447:         mov     ecx, dN
                   1448:         lea     eax, [strip.ST_alStrips]
                   1449:         jmp     done_output_strips
                   1450: 
                   1451: ;-----------------------------------------------------------------------;
                   1452: ; Empty strips buffer.  Either get new line or do y-major bank switch.  ;
                   1453: ;-----------------------------------------------------------------------;
                   1454: 
                   1455: final_strip:
                   1456:         add     ebx, esi
                   1457:         mov     [eax], ebx
                   1458:         add     eax, 4
                   1459: 
                   1460:         cmp     cPelsAfterThisBank, 0
                   1461:         jg      short bank_y_major
                   1462: 
                   1463: very_final_strip:
                   1464:         lea     edx, [strip]
                   1465:         mov     ecx, pls
                   1466: 
                   1467:         ptrCall <dword ptr pfn>, \
                   1468:                 <edx, ecx, eax>
                   1469: 
                   1470: ; NOTE: next_line is jumped to from various places, and it cannot assume
                   1471: ;       any registers are loaded.
                   1472: 
                   1473: next_line:
                   1474:         mov     ebx, flStart
                   1475:         testb   ebx, FL_COMPLEX_CLIP
                   1476:         jnz     short see_if_done_complex_clipping
                   1477: 
                   1478:         mov     edx, pptfxBuf
                   1479:         cmp     edx, pptfxBufEnd
                   1480:         je      short all_done
                   1481: 
                   1482:         mov     esi, [edx].ptl_x
                   1483:         mov     ecx, [edx].ptl_y
                   1484:         add     edx, size POINTL
                   1485:         mov     pptfxBuf, edx
                   1486:         mov     eax, [edx].ptl_x
                   1487:         mov     edi, [edx].ptl_y
                   1488:         jmp     the_main_loop
                   1489: 
                   1490: all_done:
                   1491:         mov     eax, 1
                   1492: 
                   1493:         cRet    bLines
                   1494: 
                   1495: see_if_done_complex_clipping:
                   1496:         mov     ebx, fl
                   1497:         dec     cptfx
                   1498:         jz      short all_done
                   1499: 
                   1500:         and     ebx, NOT FL_FLIP_HALF   ; Make sure the next run doesn't have
                   1501:         mov     fl, ebx                 ;   to do a half-flip if it doesn't
                   1502:                                         ;   want to
                   1503:         jmp     continue_complex_clipping
                   1504: 
                   1505: ;-----------------------------------------------------------------------;
                   1506: ; Switch banks for a y-major line.                                      ;
                   1507: ;-----------------------------------------------------------------------;
                   1508: 
                   1509:         public  bank_y_major
                   1510: bank_y_major:
                   1511:         mov     d_R, edx
                   1512:         mov     i, esi
                   1513:         mov     r, edi
                   1514:         mov     dN, ecx
                   1515:         sub     ebx, esi                ; Undo our offset
                   1516: 
                   1517: bank_y_output_strips:
                   1518:         lea     edx, [strip]
                   1519:         mov     ecx, pls
                   1520: 
                   1521:         ptrCall <dword ptr pfn>, \
                   1522:                 <edx, ecx, eax>
                   1523: 
                   1524:         mov     esi, pdsurf
                   1525:         test    fl, FL_FLIP_V
                   1526:         jz      short bank_y_down
                   1527: 
                   1528: bank_y_up:
                   1529:         mov     edi, strip.ST_pjScreen
                   1530:         sub     edi, [esi].dsurf_pvBitmapStart
                   1531:         mov     ecx, [esi].dsurf_rcl1WindowClip.yTop
                   1532:         push    ecx
                   1533:         dec     ecx                     ; we want yTop - 1 to be mapped in
                   1534: 
                   1535: ; Map in the next higher bank:
                   1536: 
                   1537:         ptrCall <dword ptr [esi].dsurf_pfnBankControl>, \
                   1538:                 <esi, ecx, JustifyBottom>; ebx, esi and edi are preserved
                   1539: 
                   1540:         pop     ecx
                   1541:         sub     ecx, [esi].dsurf_rcl1WindowClip.yTop
                   1542:                                         ; ecx = # of scans can do in bank
                   1543: 
                   1544:         add     edi, [esi].dsurf_pvBitmapStart
                   1545:         mov     strip.ST_pjScreen, edi
                   1546: 
                   1547:         mov     edx, cPelsAfterThisBank                 ; edx = cPelsAfterBank
                   1548:         lea     eax, [strip.ST_alStrips]                ; eax = plStrip
                   1549:         or      ebx, ebx                                ; ebx = cPels
                   1550:         jge     bank_y_done_partial_strip
                   1551:         jmp     short bank_y_done_switch
                   1552: 
                   1553: bank_y_down:
                   1554:         mov     edi, strip.ST_pjScreen
                   1555:         sub     edi, [esi].dsurf_pvBitmapStart
                   1556:         mov     ecx, [esi].dsurf_rcl1WindowClip.yBottom
                   1557:         push    ecx
                   1558: 
                   1559: ; Map in the next lower bank:
                   1560: 
                   1561:         ptrCall <dword ptr [esi].dsurf_pfnBankControl>, \
                   1562:                 <esi, ecx, JustifyTop>  ; ebx, esi and edi are preserved
                   1563: 
                   1564:         pop     eax
                   1565:         mov     ecx, [esi].dsurf_rcl1WindowClip.yBottom
                   1566:         sub     ecx, eax                ; ecx = # scans can do in bank
                   1567: 
                   1568:         add     edi, [esi].dsurf_pvBitmapStart
                   1569:         mov     strip.ST_pjScreen, edi
                   1570: 
                   1571:         mov     edx, cPelsAfterThisBank                 ; edx = cPelsAfterBank
                   1572:         lea     eax, [strip.ST_alStrips]                ; eax = plStrip
                   1573:         or      ebx, ebx                                ; ebx = cPels
                   1574:         jge     short bank_y_done_partial_strip
                   1575: 
                   1576: bank_y_done_switch:
                   1577: 
                   1578: ; Handle a single strip stretching over multiple banks:
                   1579: 
                   1580:         test    fl, FL_FLIP_HALF
                   1581:         jz      short bank_y_no_half_flip
                   1582: 
                   1583: ; We now have to adjust for the fact that the strip drawers always leave
                   1584: ; the state ready for the next new strip (e.g., if we're doing vertical
                   1585: ; strips, it advances pjScreen one to the right after drawing each strip).
                   1586: ; But the problem is that since we crossed a bank, we have to continue the
                   1587: ; *old* strip, so we have to undo that advance:
                   1588: 
                   1589: bank_y_half_flip:
                   1590:         ror     strip.ST_jStyleMask, 1
                   1591:         ror     strip.ST_jBitMask, 1
                   1592:         adc     strip.ST_pjScreen, 0
                   1593:         jmp     short bank_y_done_bit_adjust
                   1594: 
                   1595: bank_y_no_half_flip:
                   1596:         rol     strip.ST_jStyleMask, 1
                   1597:         rol     strip.ST_jBitMask, 1
                   1598:         sbb     strip.ST_pjScreen, 0
                   1599: 
                   1600: bank_y_done_bit_adjust:
                   1601:         mov     esi, ebx
                   1602:         neg     esi                             ; esi = # pels left in strip
                   1603: 
                   1604: ; eax = pointer to first strip entry
                   1605: ; ebx = negative esi
                   1606: ; ecx = # of pels we can put down in this window
                   1607: ; edx = # of pels remaining to do in line
                   1608: ; esi = # of pels left in strip
                   1609: 
                   1610: ; We have three special cases to check here:
                   1611: ;
                   1612: ;       1) If the strip spans the entire next window
                   1613: ;       2) This is the last strip in the line
                   1614: ;       3) Neither of the above
                   1615: 
                   1616:         cmp     edx,ecx                         ;if line shorter than bank,
                   1617:         jle     short bank_y_check_if_last_strip;  know strip doesn't span bank
                   1618: 
                   1619:         cmp     esi,ecx                         ;if line spans bank, don't have
                   1620:         jl      short bank_y_continue_strip     ;  to check if last strip
                   1621: 
                   1622: ; If ((# of pels in line > window size) && (# of pels in strip > window size))
                   1623: ; then the strip spans this bank:
                   1624: 
                   1625:         mov     [eax], ecx
                   1626:         add     eax, 4
                   1627:         add     ebx, ecx
                   1628:         sub     edx, ecx
                   1629:         mov     cPelsAfterThisBank, edx
                   1630:         jmp     bank_y_output_strips
                   1631: 
                   1632: bank_y_check_if_last_strip:
                   1633:         cmp     esi, edx                        ;if strip is shorter than line,
                   1634:         jl      short bank_y_continue_strip     ;  we know this isn't the last
                   1635:                                                 ;  strip
                   1636: 
                   1637: ; Handle case where this is the last strip in the line and it overlaps a bank:
                   1638: 
                   1639:         mov     [eax], edx
                   1640:         add     eax, 4
                   1641:         jmp     very_final_strip
                   1642: 
                   1643: bank_y_continue_strip:
                   1644:         mov     [eax], esi
                   1645:         add     eax, 4
                   1646: 
                   1647: bank_y_done_partial_strip:
                   1648:         add     ebx, edx                ; cPels += cPelsAfterThisBank
                   1649:         sub     edx, ecx                ; cPelsAfterThisBank -= cyWindow
                   1650: 
                   1651:         jle     short bank_y_get_ready
                   1652:         sub     ebx, edx
                   1653: 
                   1654: bank_y_get_ready:
                   1655:         mov     cPelsAfterThisBank, edx
                   1656:         mov     edi, r
                   1657:         mov     edx, d_R
                   1658:         mov     ecx, dN
                   1659:         jmp     done_output_strips
                   1660: 
                   1661: ;---------------------------Private-Routine-----------------------------;
                   1662: ; do_some_styling
                   1663: ;
                   1664: ; Inputs:
                   1665: ;       eax = ptlStart.ptl_y
                   1666: ;       ebx = fl
                   1667: ;       ecx = ptlStart.ptl_x
                   1668: ; Preserves:
                   1669: ;       eax, ebx, ecx
                   1670: ; Output:
                   1671: ;       Exits to done_styling.
                   1672: ;
                   1673: ;-----------------------------------------------------------------------;
                   1674: 
                   1675:         public  do_some_styling
                   1676: do_some_styling:
                   1677:         mov     esi, pls
                   1678:         mov     ptlStart.ptl_x, ecx
                   1679: 
                   1680:         mov     edi, [esi].LS_spNext    ; spThis
                   1681:         mov     edx, edi
                   1682:         add     edx, cStylePels         ; spNext
                   1683: 
                   1684:         testb   ebx, FL_ALTERNATESTYLED
                   1685:         jz      short do_non_alternate_style
                   1686: 
                   1687: ; Do alternate styles:
                   1688: 
                   1689:         and     edx, 1
                   1690:         mov     [esi].LS_spNext, edx
                   1691:         testb   ebx, FL_FLIP_H
                   1692:         jz      short alternate_left_to_right
                   1693: 
                   1694:         add     ecx, edx
                   1695:         sub     ecx, x0
                   1696:         add     ecx, xStart             ; ptlStart.x + spNext - x0 + xStart + 1
                   1697:         inc     ecx
                   1698:         jmp     short compute_alternate_mask
                   1699: 
                   1700: alternate_left_to_right:
                   1701:         add     ecx, edi
                   1702:         add     ecx, x0
                   1703:         sub     ecx, xStart             ; ptlStart.x + spThis + x0 - xStart
                   1704: 
                   1705: compute_alternate_mask:
                   1706:         mov     strip.ST_jStyleMask, 55h
                   1707:         ror     strip.ST_jStyleMask, cl
                   1708: 
                   1709:         mov     strip.ST_spRemaining, 1
                   1710:         mov     strip.ST_xyDensity, 1
                   1711:         mov     ecx, ptlStart.ptl_x
                   1712:         jmp     done_styling
                   1713: 
                   1714: do_non_alternate_style:
                   1715: 
                   1716: ; For styles, we don't bother to keep the style position normalized.
                   1717: ; (we do ensure that it's positive, though).  If a figure is over 2
                   1718: ; billion pels long, we'll be a pel off in our style state (oops!).
                   1719: 
                   1720:         and     edx, 7fffffffh
                   1721:         mov     [esi].LS_spNext, edx
                   1722:         mov     ptlStart.ptl_y, eax
                   1723:         testb   ebx, FL_MASKSTYLED
                   1724:         jz      short do_arbitrary_style
                   1725: 
                   1726: ; Do mask styles:
                   1727: 
                   1728:         mov     eax, [esi].LS_xyDensity         ; Gotta copy to strips struct
                   1729:         mov     strip.ST_xyDensity, eax
                   1730: 
                   1731:         testb   ebx, FL_FLIP_H
                   1732:         jz      short mask_left_to_right
                   1733: 
                   1734:         sub     edx, x0
                   1735:         add     edx, xStart
                   1736:         add     edx, 2
                   1737:         mov     eax, edx
                   1738:         xor     edx, edx
                   1739: 
                   1740:         mov     edi, STYLE_DENSITY
                   1741:         div     edi
                   1742:         add     ecx, eax
                   1743:         inc     edx
                   1744:         mov     eax, [esi].LS_ulStyleMaskRtoL
                   1745:         jmp     short compute_masked_mask
                   1746: 
                   1747: mask_left_to_right:
                   1748:         add     edi, x0
                   1749:         sub     edi, xStart
                   1750:         mov     eax, edi
                   1751:         xor     edx, edx
                   1752:         mov     edi, STYLE_DENSITY
                   1753:         div     edi
                   1754:         sub     ecx, eax
                   1755:         neg     edx
                   1756:         add     edx, STYLE_DENSITY
                   1757:         mov     eax, [esi].LS_ulStyleMaskLtoR
                   1758: 
                   1759: compute_masked_mask:
                   1760:         mov     strip.ST_spRemaining, edx
                   1761:         ror     al, cl
                   1762:         mov     strip.ST_jStyleMask, al
                   1763:         mov     eax, ptlStart.ptl_y
                   1764:         mov     ecx, ptlStart.ptl_x
                   1765:         jmp     done_styling
                   1766: 
                   1767: ; Do arbitrary styles:
                   1768: 
                   1769: do_arbitrary_style:
                   1770:         testb   ebx, FL_FLIP_H
                   1771:         jz      short arbitrary_left_to_right
                   1772: 
                   1773:         sub     edx, x0
                   1774:         add     edx, xStart
                   1775:         mov     eax, edx
                   1776:         xor     edx, edx
                   1777:         div     [esi].LS_spTotal
                   1778: 
                   1779:         neg     edx
                   1780:         jge     short continue_right_to_left
                   1781:         add     edx, [esi].LS_spTotal
                   1782:         not     eax
                   1783: 
                   1784: continue_right_to_left:
                   1785:         mov     edi, dword ptr [esi].LS_jStartMask
                   1786:         not     edi
                   1787:         mov     ecx, [esi].LS_aspRtoL
                   1788:         jmp     short compute_arbitrary_stuff
                   1789: 
                   1790: arbitrary_left_to_right:
                   1791:         add     edi, x0
                   1792:         sub     edi, xStart
                   1793:         mov     eax, edi
                   1794:         xor     edx, edx
                   1795:         div     [esi].LS_spTotal
                   1796:         mov     edi, dword ptr [esi].LS_jStartMask
                   1797:         mov     ecx, [esi].LS_aspLtoR
                   1798: 
                   1799: compute_arbitrary_stuff:
                   1800: ;       eax = sp / spTotal
                   1801: ;       ebx = fl
                   1802: ;       ecx = pspStart
                   1803: ;       edx = sp % spTotal
                   1804: ;       esi = pls
                   1805: ;       edi = jStyleMask
                   1806: 
                   1807:         and     eax, [esi].LS_cStyle        ; if odd length style and second run
                   1808:         and     al, 1                       ; through style array, flip the
                   1809:         jz      short odd_style_array_done  ; meaning of the elements
                   1810:         not     edi
                   1811: 
                   1812: odd_style_array_done:
                   1813:         mov     eax, [esi].LS_cStyle
                   1814:         mov     strip.ST_pspStart, ecx
                   1815:         lea     eax, [ecx + eax * 4 - 4]
                   1816:         mov     strip.ST_pspEnd, eax
                   1817: 
                   1818: find_psp:
                   1819:         sub     edx, [ecx]
                   1820:         jl      short found_psp
                   1821:         add     ecx, 4
                   1822:         jmp     short find_psp
                   1823: 
                   1824: found_psp:
                   1825:         mov     strip.ST_psp, ecx
                   1826:         neg     edx
                   1827:         mov     strip.ST_spRemaining, edx
                   1828: 
                   1829:         sub     ecx, strip.ST_pspStart
                   1830:         test    ecx, 4                      ; size STYLEPOS
                   1831:         jz      short done_arbitrary
                   1832:         not     edi
                   1833: 
                   1834: done_arbitrary:
                   1835:         mov     dword ptr strip.ST_jStyleMask, edi
                   1836:         mov     eax, ptlStart.ptl_y
                   1837:         mov     ecx, ptlStart.ptl_x
                   1838:         jmp     done_styling
                   1839: 
                   1840: ;---------------------------Private-Routine-----------------------------;
                   1841: ; do_some_clipping
                   1842: ;
                   1843: ; Inputs:
                   1844: ;       eax = garbage
                   1845: ;       ebx = fl
                   1846: ;       ecx = x0
                   1847: ;       edx = garbage
                   1848: ;       esi = x1
                   1849: ;       edi = garbage
                   1850: ;
                   1851: ; Decides whether to do simple or complex clipping.
                   1852: ;
                   1853: ;-----------------------------------------------------------------------;
                   1854: 
                   1855:         align 4
                   1856: 
                   1857:         public  do_some_clipping
                   1858: do_some_clipping:
                   1859:         testb   ebx, FL_COMPLEX_CLIP
                   1860:         jnz     initialize_complex_clipping
                   1861: 
                   1862: ;-----------------------------------------------------------------------;
                   1863: ; simple_clipping
                   1864: ;
                   1865: ; Inputs:
                   1866: ;       ebx = fl
                   1867: ;       ecx = x0
                   1868: ;       esi = x1
                   1869: ; Output:
                   1870: ;       ebx = fl
                   1871: ;       ecx = new x0 (stack variable updated too)
                   1872: ;       esi = new x1
                   1873: ;       y0 stack variable updated
                   1874: ; Uses:
                   1875: ;       All registers
                   1876: ; Exits:
                   1877: ;       to done_clipping
                   1878: ;
                   1879: ; This routine handles clipping the line to the clip rectangle (it's
                   1880: ; faster to handle this case in the driver than to call the engine to
                   1881: ; clip for us).
                   1882: ;
                   1883: ; Fractional end-point lines complicate our lives a bit when doing
                   1884: ; clipping:
                   1885: ;
                   1886: ; 1) For styling, we must know the unclipped line's length in pels, so
                   1887: ;    that we can correctly update the styling state when the line is
                   1888: ;    clipped.  For this reason, I do clipping after doing the hard work
                   1889: ;    of figuring out which pixels are at the ends of the line (this is
                   1890: ;    wasted work if the line is not styled and is completely clipped,
                   1891: ;    but I think it's simpler this way).  Another reason is that we'll
                   1892: ;    have calculated eqGamma already, which we use for the intercept
                   1893: ;    calculations.
                   1894: ;
                   1895: ;    With the assumption that most lines will not be completely clipped
                   1896: ;    away, this strategy isn't too painful.
                   1897: ;
                   1898: ; 2) x0, y0 are not necessarily zero, where (x0, y0) is the start pel of
                   1899: ;    the line.
                   1900: ;
                   1901: ; 3) We know x0, y0 and x1, but not y1.  We haven't needed to calculate
                   1902: ;    y1 until now.  We'll need the actual value, and not an upper bound
                   1903: ;    like y1 = LFLOOR(dM) + 2 because we have to be careful when
                   1904: ;    calculating x(y) that y0 <= y <= y1, otherwise we can cause an
                   1905: ;    overflow on the divide (which, needless to say, is bad).
                   1906: ;
                   1907: ;-----------------------------------------------------------------------;
                   1908: 
                   1909:         public  simple_clipping
                   1910: simple_clipping:
                   1911:         mov     edi, prclClip           ; get pointer to normalized clip rect
                   1912:         and     ebx, FL_RECTLCLIP_MASK  ;   (it's lower-right exclusive)
                   1913: 
                   1914:         .errnz  (FL_RECTLCLIP_SHIFT - 2); ((ebx AND FL_RECTLCLIP_MASK) shr
                   1915:         .errnz  (size RECTL) - 16       ;   FL_RECTLCLIP_SHIFT) is our index
                   1916:         lea     edi, [edi + ebx*4]      ;   into the array of rectangles
                   1917: 
                   1918:         mov     edx, [edi].xRight       ; load the rect coordinates
                   1919:         mov     eax, [edi].xLeft
                   1920:         mov     ebx, [edi].yBottom
                   1921:         mov     edi, [edi].yTop
                   1922: 
                   1923: ; Translate to our origin and so some quick completely clipped tests:
                   1924: 
                   1925:         sub     edx, x
                   1926:         cmp     ecx, edx
                   1927:         jge     totally_clipped         ; totally clipped if x0 >= xRight
                   1928: 
                   1929:         sub     eax, x
                   1930:         cmp     esi, eax
                   1931:         jl      totally_clipped         ; totally clipped if x1 < xLeft
                   1932: 
                   1933:         sub     ebx, y
                   1934:         cmp     y0, ebx
                   1935:         jge     totally_clipped         ; totally clipped if y0 >= yBottom
                   1936: 
                   1937:         sub     edi, y
                   1938: 
                   1939: ; Save some state:
                   1940: 
                   1941:         mov     xClipRight, edx
                   1942:         mov     xClipLeft, eax
                   1943: 
                   1944:         cmp     esi, edx                ; if (x1 >= xRight) x1 = xRight - 1
                   1945:         jl      short calculate_y1
                   1946:         lea     esi, [edx - 1]
                   1947: 
                   1948: calculate_y1:
                   1949:         mov     eax, esi                ; y1 = (x1 * dN + eqGamma) / dM
                   1950:         mul     dN
                   1951:         add     eax, eqGamma_lo
                   1952:         adc     edx, eqGamma_hi
                   1953:         div     dM
                   1954: 
                   1955:         cmp     edi, eax                ; if (yTop > y1) clipped
                   1956:         jg      short totally_clipped
                   1957: 
                   1958:         cmp     ebx, eax                ; if (yBottom > y1) know x1
                   1959:         jg      short x1_computed
                   1960: 
                   1961:         mov     eax, ebx                ; x1 = (yBottom * dM + eqBeta) / dN
                   1962:         mul     dM
                   1963:         stc
                   1964:         sbb     eax, eqGamma_lo
                   1965:         sbb     edx, eqGamma_hi
                   1966:         div     dN
                   1967:         mov     esi, eax
                   1968: 
                   1969: ; At this point, we've taken care of calculating the intercepts with the
                   1970: ; right and bottom edges.  Now we work on the left and top edges:
                   1971: 
                   1972: x1_computed:
                   1973:         mov     edx, y0
                   1974: 
                   1975:         mov     eax, xClipLeft          ; don't have to compute y intercept
                   1976:         cmp     eax, ecx                ;   at left edge if line starts to
                   1977:         jle     short top_intercept     ;   right of left edge
                   1978: 
                   1979:         mov     ecx, eax                ; x0 = xLeft
                   1980:         mul     dN                      ; y0 = (xLeft * dN + eqGamma) / dM
                   1981:         add     eax, eqGamma_lo
                   1982:         adc     edx, eqGamma_hi
                   1983:         div     dM
                   1984: 
                   1985:         cmp     ebx, eax                ; if (yBottom <= y0) clipped
                   1986:         jle     short totally_clipped
                   1987: 
                   1988:         mov     edx, eax
                   1989:         mov     y0, eax
                   1990: 
                   1991: top_intercept:
                   1992:         mov     ebx, fl                 ; get ready to leave
                   1993:         mov     x0, ecx
                   1994: 
                   1995:         cmp     edi, edx                ; if (yTop <= y0) done clipping
                   1996:         jle     done_clipping
                   1997: 
                   1998:         mov     eax, edi                ; x0 = (yTop * dM + eqBeta) / dN + 1
                   1999:         mul     dM
                   2000:         stc
                   2001:         sbb     eax, eqGamma_lo
                   2002:         sbb     edx, eqGamma_hi
                   2003:         div     dN
                   2004:         lea     ecx, [eax + 1]
                   2005: 
                   2006:         cmp     xClipRight, ecx         ; if (xRight <= x0) clipped
                   2007:         jle     short totally_clipped
                   2008: 
                   2009:         mov     y0, edi                 ; y0 = yTop
                   2010:         mov     x0, ecx
                   2011:         jmp     done_clipping           ; all done!
                   2012: 
                   2013: totally_clipped:
                   2014: 
                   2015: ; The line is completely clipped.  See if we have to update our style state:
                   2016: 
                   2017:         mov     ebx, fl
                   2018:         testb   ebx, FL_STYLED
                   2019:         jz      next_line
                   2020: 
                   2021: ; Adjust our style state:
                   2022: 
                   2023:         mov     esi, pls
                   2024:         mov     eax, [esi].LS_spNext
                   2025:         add     eax, cStylePels
                   2026:         mov     [esi].LS_spNext, eax
                   2027: 
                   2028:         cmp     eax, [esi].LS_spTotal2
                   2029:         jb      next_line
                   2030: 
                   2031: ; Have to normalize first:
                   2032: 
                   2033:         xor     edx, edx
                   2034:         div     [esi].LS_spTotal2
                   2035:         mov     [esi].LS_spNext, edx
                   2036: 
                   2037:         jmp     next_line
                   2038: 
                   2039: ;-----------------------------------------------------------------------;
                   2040: 
                   2041: initialize_complex_clipping:
                   2042:         mov     eax, dN                 ; save a copy of original dN
                   2043:         mov     dN_Original, eax
                   2044: 
                   2045: ;---------------------------Private-Routine-----------------------------;
                   2046: ; continue_complex_clipping
                   2047: ;
                   2048: ; Inputs:
                   2049: ;       ebx = fl
                   2050: ; Output:
                   2051: ;       ebx = fl
                   2052: ;       ecx = x0
                   2053: ;       esi = x1
                   2054: ; Uses:
                   2055: ;       All registers.
                   2056: ; Exits:
                   2057: ;       to done_clipping
                   2058: ;
                   2059: ; This routine handles the necessary initialization for the next
                   2060: ; run in the CLIPLINE structure.
                   2061: ;
                   2062: ; NOTE: This routine is jumped to from two places!
                   2063: ;-----------------------------------------------------------------------;
                   2064: 
                   2065:         public  continue_complex_clipping
                   2066: continue_complex_clipping:
                   2067:         mov     edi, prun
                   2068:         mov     ecx, xStart
                   2069:         testb   ebx, FL_FLIP_H
                   2070:         jz      short complex_left_to_right
                   2071: 
                   2072: complex_right_to_left:
                   2073: 
                   2074: ; Figure out x0 and x1 for right-to-left lines:
                   2075: 
                   2076:         add     ecx, cStylePels
                   2077:         dec     ecx
                   2078:         mov     esi, ecx                ; esi = ecx = xStart + cStylePels - 1
                   2079:         sub     ecx, [edi].RUN_iStop    ; New x0
                   2080:         sub     esi, [edi].RUN_iStart   ; New x1
                   2081:         jmp     short complex_reset_variables
                   2082: 
                   2083: complex_left_to_right:
                   2084: 
                   2085: ; Figure out x0 and x1 for left-to-right lines:
                   2086: 
                   2087:         mov     esi, ecx                ; esi = ecx = xStart
                   2088:         add     ecx, [edi].RUN_iStart   ; New x0
                   2089:         add     esi, [edi].RUN_iStop    ; New x1
                   2090: 
                   2091: complex_reset_variables:
                   2092:         mov     x0, ecx
                   2093: 
                   2094: ; The half flip mucks with some of our variables, and we have to reset
                   2095: ; them every pass.  We would have to reset eqGamma too, but it never
                   2096: ; got saved to memory in its modified form.
                   2097: 
                   2098:         add     edi, size RUN
                   2099:         mov     prun, edi               ; Increment run pointer for next time
                   2100: 
                   2101:         mov     edi, pls
                   2102:         mov     eax, [edi].LS_spComplex
                   2103:         mov     [edi].LS_spNext, eax    ; pls->spNext = pls->spComplex
                   2104: 
                   2105:         mov     eax, dN_Original        ; dN = dN_Original
                   2106:         mov     dN, eax
                   2107: 
                   2108:         mul     ecx
                   2109:         add     eax, eqGamma_lo
                   2110:         adc     edx, eqGamma_hi         ; [edx:eax] = dN*x0 + eqGamma
                   2111: 
                   2112:         div     dM
                   2113:         mov     y0, eax
                   2114:         jmp     done_clipping
                   2115: 
                   2116: endProc bLines
                   2117: 
                   2118: _TEXT$03   ends
                   2119: 
                   2120:         end
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.