|
|
1.1 ! root 1: #include <u.h> ! 2: #include <libc.h> ! 3: #include <libg.h> ! 4: #include "/sys/include/gnot.h" ! 5: /* ! 6: * Compiling bitblt - for each call of gbitblt, generate ! 7: * machine code for the specific arguments passed in, ! 8: * then execute that code as a subroutine call. ! 9: * ! 10: * The files bb?.h (where ? is replaced by the ! 11: * architectures single-letter code: v, k, etc., depending ! 12: * on which of Tmips, Tsparc, etc., is defined) define macros ! 13: * which define operations on an abstract machine. ! 14: * If there is no architecture-specific file, bbc.h is used, ! 15: * which interprets a program with the same operations as ! 16: * the assumed abstract machine. See bbc.h for a description ! 17: * of the machine's registers and operations, and for the ! 18: * macros that must be defined to make a new bb?.h ! 19: * ! 20: * The code will work for bitblts to and from any bitmaps ! 21: * with ldepths 0, 1, 2, or 3. Some of those conversions ! 22: * may be #ifdef'd out, because no Plan 9 code currently ! 23: * uses them, and the tables take up space. The converting ! 24: * bitblts will need work if they are to work with ldepths > 3. ! 25: * ! 26: * This file also contains a thorough bitblt tester. ! 27: * When TEST is defined, a main program is created to ! 28: * try forward and backward cases for all bitblt opcodes, ! 29: * first on single pixels, and then on parts of the middle two rows ! 30: * of random 4-row bitmaps. If anything fails, it prints information ! 31: * about the case that failed (or perhaps it will just die, ! 32: * if bad code has been generated). ! 33: * The testing programs takes as arguments two numbers: the source ! 34: * and destination ldepths (both 0 by default). A -s flag says to ! 35: * use simpler, repeatable tests. A -i num flag says how many ! 36: * iterations to do for each opcode. ! 37: */ ! 38: ! 39: /* Bitblt cases: ! 40: * - bitmap overlap sometimes dictates that you go forward (f) through ! 41: * the bitmaps, sometimes backwards (b) ! 42: * - different relative alignments of the source and destination ! 43: * starting points within a word require different code: ! 44: * the bit offsets may be the same (e), the source may start ! 45: * later in the word (g), or the source may start earlier in the word (l) ! 46: * - when a row of the destination is all within one word (o), better ! 47: * code can be generated ! 48: * - when the object machine has bitfield extraction/insertion instructions, ! 49: * it is better to do < 32bit wide bitblts using them (bf) ! 50: * - if the source and destination bitmaps have different depths, ! 51: * it is either and expansion (exp) or a contraction (con) of pixels. ! 52: * These two cases aren't further differentiated into f vs. b, etc. ! 53: */ ! 54: ! 55: /* ! 56: * To calculate the potential size of the bitblt program, use the following ! 57: * formulas: ! 58: * nonconverting max (bshg): X + 10L + 6S + 2F + 3E + 8LX + 3SX ! 59: * converting max (contracting by factor of 8, 32-bit memory accesses): ! 60: * X + XT + 46L + 48S + 96T + 64A + 32AX + 2F + 3E + 30LX + 48SX ! 61: * ! 62: * where X = Initsd+Extrainit+Iloop+Oloop+Rts ; XT = Inittab ! 63: * E = Emitop ; F = Field ! 64: * L = load, fetch or store ; LX = extra if pre or post decrement ! 65: * S = shift (sha or shb) ; SX = extra if OR too ! 66: * T = Table ; A = Assemble ; AX = Assemblex ! 67: */ ! 68: enum ! 69: { ! 70: Tfshe = 0, /* each of the triples must be in order e, l, g */ ! 71: Tfshl, ! 72: Tfshg, ! 73: ! 74: Tbshe, ! 75: Tbshl, ! 76: Tbshg, ! 77: ! 78: Toshe, ! 79: Toshl, ! 80: Toshg, ! 81: ! 82: Tobf, ! 83: ! 84: Texp, ! 85: ! 86: Tcon, ! 87: ! 88: Tlast, /* total number of cases */ ! 89: }; ! 90: ! 91: #ifdef TEST ! 92: /* ! 93: * globals used for testing ! 94: */ ! 95: int FORCEFORW; ! 96: int FORCEBAKW; ! 97: GBitmap *curdm, *cursm; ! 98: Point curpt; ! 99: Rectangle curr; ! 100: Fcode curf; ! 101: void *mem; ! 102: #endif ! 103: ! 104: /* ! 105: * set up to compile -DT$objtype ! 106: */ ! 107: #ifdef Tmips ! 108: #include "bbv.h" ! 109: #else ! 110: #ifdef T68020 ! 111: #include "bb2.h" ! 112: #else ! 113: #ifdef Tsparc ! 114: #include "bbk.h" ! 115: #else ! 116: #ifdef T386 ! 117: #include "bb8l.h" ! 118: #else ! 119: #ifdef Thobbit ! 120: #include "bbcl.h" ! 121: #else ! 122: #include "bbc.h" ! 123: #endif ! 124: #endif ! 125: #endif ! 126: #endif ! 127: #endif ! 128: ! 129: /* ! 130: * bitblt operates a 'word' at a time. ! 131: * WBITS is the number of bits in a word ! 132: * LWBITS=log2(WBITS), ! 133: * W2L is the number of words in a long ! 134: * WMASK has bits set for the low order word of a long ! 135: * WType is a pointer to a word ! 136: * if LENDIAN is true, then left-to-right in bitmap ! 137: * means low-order-bit to high-order-bit within a word, ! 138: * otherwise it is high-order-bit to low-order-bit. ! 139: */ ! 140: #ifndef WBITS ! 141: #define WBITS 32 ! 142: #define LWBITS 5 ! 143: #define W2L 1 ! 144: #define WMASK ~0UL ! 145: typedef ulong *WType; ! 146: #endif ! 147: /* ! 148: * scrshl(v,o) shifts a word v by o bits screen-leftward ! 149: * scrshr(v,o) shifts a word v by o bits screen-rightward ! 150: * scrpix(v,i,l) gets the value of pixel i within word v when ldepth is l ! 151: * scrmask(i,l) has ones for pixel i when ldepth is l ! 152: */ ! 153: #define scrshl(v,o) (LENDIAN? ((v)>>(o)) : ((v)<<(o))) ! 154: #define scrshr(v,o) (LENDIAN? ((v)<<(o)) : ((v)>>(o))) ! 155: #define scrpix(v,i,l) (LENDIAN? (((v)>>((i)<<(l)))&((1<<(1<<(l)))-1)) : (((v)>>(32-(((i)+1)<<(l))))&((1<<(1<<(l)))-1))) ! 156: #define scrmask(i,l) (LENDIAN? (((1<<(1<<(l)))-1)<<((i)<<(l))) : (((1<<(1<<(l)))-1)<<(32-(((i)+1)<<l)))) ! 157: ! 158: void ! 159: gbitblt(GBitmap *dm, Point pt, GBitmap *sm, Rectangle r, Fcode fcode) ! 160: { ! 161: int type; /* category of bitblt: Tfshe or ... */ ! 162: int width; /* width in bits of dst */ ! 163: int height; /* height in pixels minus 1 */ ! 164: int sh; /* left shift of src to align with dst */ ! 165: int soff; /* bit offset of src start point */ ! 166: int doff; /* bit offset of dst start point */ ! 167: int sc; /* src words used so far */ ! 168: int dc; /* dst words used so far */ ! 169: int le; /* log expansion factor */ ! 170: int sspan; /* words between scanlines in src */ ! 171: int dspan; /* words between scanlines in dst */ ! 172: int sdep; /* src ldepth */ ! 173: int ddep; /* dst ldepth */ ! 174: int onstack; /* compiling to stack arena */ ! 175: int backward; /* does bitblt have to go backwards? */ ! 176: ulong* saddr; /* addr of word in src containing start point */ ! 177: ulong* daddr; /* addr of word in dst containing start point */ ! 178: ulong lmask; /* affected pixels in leftmost dst word */ ! 179: ulong rmask; /* affected pixels in rightmost dst word */ ! 180: Type* lo; /* addr in program for beginning of outer loop */ ! 181: Type* li; /* addr in program for beginning of inner loop */ ! 182: uchar *tab; /* conversion table */ ! 183: int osiz; /* size of table entries, in bytes */ ! 184: Type* memstart; /* start of program */ ! 185: Type* p; /* next free address in program */ ! 186: Type* fi; /* pointer to beginning of instrs for Rs f= Rd */ ! 187: int fin; /* number of Types to copy after fi */ ! 188: long v; /* for use in Emitop macro */ ! 189: int c; /* a count */ ! 190: int fs; /* if need to fetch source */ ! 191: int fd; /* if need to fetch dest */ ! 192: int b; /* for expansion: take b bits at a time from src */ ! 193: int db; /* number of bits yielded by table lookup */ ! 194: int dl; /* log2 number of bytes yielded by table lookup */ ! 195: int sf; /* a bit offset in src */ ! 196: int df; /* a bit offset in dst */ ! 197: int firstd; /* doing first part of dst ? */ ! 198: int firsts; /* doing first part of src ? */ ! 199: int f; /* int version of fcode */ ! 200: long tmp; /* for use by some macros */ ! 201: int sha; /* |sh|%32 */ ! 202: int shb; /* WBITS-sha, if sha!=0 */ ! 203: int sfo; /* bit offset origin (needed if WBITS==8) */ ! 204: Fstr *pf; ! 205: Type arena[Progmaxnoconv]; /* for non-converting bitblts */ ! 206: ! 207: onstack = bbonstack(); ! 208: gbitbltclip(&dm); ! 209: ! 210: #ifdef TEST ! 211: curdm = dm; ! 212: cursm = sm; ! 213: curpt = pt; ! 214: curr = r; ! 215: curf = fcode; ! 216: #endif ! 217: ! 218: width = r.max.x - r.min.x; ! 219: if(width <= 0) ! 220: return; ! 221: height = r.max.y - r.min.y - 1; ! 222: if(height < 0) ! 223: return; ! 224: ddep = dm->ldepth; ! 225: pt.x <<= ddep; ! 226: width <<= ddep; ! 227: ! 228: sdep = sm->ldepth; ! 229: r.min.x <<= sdep; ! 230: r.max.x <<= sdep; ! 231: ! 232: dspan = dm->width * W2L; ! 233: sspan = sm->width * W2L; ! 234: ! 235: daddr = (ulong*)((WType)dm->base ! 236: + dm->zero*W2L + pt.y*dspan ! 237: + (pt.x >> LWBITS)); ! 238: saddr = (ulong*)((WType)sm->base ! 239: + sm->zero*W2L + r.min.y*sspan ! 240: + (r.min.x >> LWBITS)); ! 241: ! 242: c = doff = pt.x & (WBITS-1); ! 243: soff = r.min.x & (WBITS-1); ! 244: ! 245: pf = &fstr[(f = fcode&0xF)]; ! 246: fs = pf->fetchs; ! 247: fd = pf->fetchd; ! 248: fin = pf->n; ! 249: fi = (Type *)pf->instr; ! 250: ! 251: if(ddep == sdep || !fs) { ! 252: #ifdef TEST ! 253: if(!FORCEBAKW && ! 254: (FORCEFORW || sm != dm || !fs || saddr > daddr || ! 255: (saddr == daddr && soff > doff))) ! 256: backward = 0; ! 257: else ! 258: backward = 1; ! 259: #else ! 260: if(sm != dm || !fs || saddr > daddr || ! 261: (saddr == daddr && soff > doff)) ! 262: backward = 0; ! 263: else ! 264: backward = 1; ! 265: #endif ! 266: #ifdef HAVEBF ! 267: if(width <= WBITS) { ! 268: sh = 0; ! 269: type = Tobf; ! 270: if(backward) { ! 271: daddr = (ulong *)((WType)daddr + height*dspan); ! 272: saddr = (ulong *)((WType)saddr + height*sspan); ! 273: } ! 274: goto init; ! 275: } ! 276: #else ! 277: if(doff+width <= WBITS) { ! 278: type = Toshe; ! 279: if(backward) { ! 280: daddr = (ulong *)((WType)daddr + height*dspan); ! 281: saddr = (ulong *)((WType)saddr + height*sspan); ! 282: } ! 283: } ! 284: #endif ! 285: else { ! 286: if(!backward) ! 287: type = Tfshe; ! 288: else { ! 289: type = Tbshe; ! 290: doff = (WBITS-(doff+width)) & (WBITS-1); ! 291: soff = (WBITS-(soff+width)) & (WBITS-1); ! 292: daddr = (ulong*)((WType)dm->base ! 293: + dm->zero*W2L + (pt.y+height)*dspan ! 294: + ((pt.x + width+(WBITS-1))>>LWBITS)); ! 295: saddr = (ulong*)((WType)sm->base ! 296: + sm->zero*W2L + (r.max.y-1)*sspan ! 297: + ((r.max.x + (WBITS-1))>>LWBITS)); ! 298: } ! 299: } ! 300: if(fs) { ! 301: if((sh = soff - doff) != 0) { ! 302: if(sh < 0) ! 303: type += Tbshl-Tbshe; ! 304: else ! 305: type += Tbshg-Tbshe; ! 306: } ! 307: } else ! 308: sh = 0; ! 309: } else { ! 310: if(sdep < 0 || sdep > 3 || ! 311: ddep < 0 || ddep > 3 || ! 312: (tab = tabs[sdep][ddep]) == 0) ! 313: return; /* sorry, conversion not enabled */ ! 314: ! 315: osiz = tabosiz[sdep][ddep]; ! 316: le = ddep - sdep; ! 317: if(le > 0) { ! 318: type = Texp; ! 319: sh = soff - (doff >> le); ! 320: } else { ! 321: type = Tcon; ! 322: sh = soff - (doff << -le); ! 323: } ! 324: onstack = 0; ! 325: backward = 0; ! 326: } ! 327: ! 328: /* c has original doff (relative to beginning) */ ! 329: lmask = scrshr(WMASK,c); ! 330: rmask = scrshl(WMASK,(WBITS - ((c+width) & (WBITS-1))))&WMASK; ! 331: if(!rmask) ! 332: rmask = WMASK; ! 333: if(sh != 0) { ! 334: if(sh > 0) ! 335: sha = sh; ! 336: else ! 337: sha = (-sh)&(WBITS-1); ! 338: shb = WBITS - sha; ! 339: } ! 340: ! 341: /* init: set up constant regs and outer loop */ ! 342: init: ! 343: if(onstack) ! 344: memstart = arena; ! 345: else ! 346: memstart = (Type*)bbmalloc(Progmax * sizeof(Type)); ! 347: p = memstart; ! 348: Initsd(saddr,daddr); ! 349: if(sh) { ! 350: Initsh(sha,shb); ! 351: } ! 352: Extrainit; ! 353: ! 354: if(height > 0) { ! 355: Olabel(height+1); ! 356: lo = p; ! 357: } ! 358: sc = 0; ! 359: dc = 0; ! 360: ! 361: /* emit inner loop */ ! 362: switch(type){ ! 363: #ifdef HAVEBF ! 364: case Tobf: ! 365: if(fd) { ! 366: Bfextu_RdAd(doff,width); ! 367: } ! 368: if(fs) { ! 369: Bfextu_RsAs(soff,width); ! 370: } ! 371: Emitop; ! 372: Bfins_AdRs(doff,width); ! 373: break; ! 374: #else ! 375: case Toshe: ! 376: /* one word dest, src and dest offsets same (or src not involved) */ ! 377: lmask &= rmask; ! 378: if(fs) { ! 379: Load_Rs(0); ! 380: } ! 381: Fetch_Rd(1); ! 382: Emitop; ! 383: Ofield(lmask); ! 384: Store_Rs; ! 385: break; ! 386: ! 387: case Toshl: ! 388: /* one word dest, src offset less than dest offset */ ! 389: lmask &= rmask; ! 390: Loadzx_Rt(0); ! 391: Fetch_Rd(0); ! 392: Orsha_RsRt; ! 393: Emitop; ! 394: Ofield(lmask); ! 395: Store_Rs; ! 396: break; ! 397: ! 398: case Toshg: ! 399: /* one word dest, src offset greater than dest offset */ ! 400: lmask &= rmask; ! 401: if(sha+doff+width > WBITS) { ! 402: Load_Rt_P; ! 403: Olsha_RsRt; ! 404: Loadzx_Rt(0); ! 405: Fetch_Rd(0); ! 406: Oorrshb_RsRt; ! 407: if(backward) ! 408: sc--; ! 409: else ! 410: sc++; ! 411: } else { ! 412: Load_Rt(0); ! 413: Fetch_Rd(0); ! 414: Olsha_RsRt; ! 415: } ! 416: Emitop; ! 417: Ofield(lmask); ! 418: Store_Rs; ! 419: break; ! 420: #endif /* HAVEBF */ ! 421: ! 422: case Tfshe: ! 423: /* forward, src and dest offsets same (or src not involved) */ ! 424: Fetch_Rd(0); ! 425: if(fs) { ! 426: Load_Rs_P; ! 427: sc++; ! 428: } else { ! 429: Nop; ! 430: } ! 431: Emitop; ! 432: Ofield(lmask); ! 433: Store_Rs_P; ! 434: dc++; ! 435: width -= WBITS - doff; ! 436: ! 437: c = width >> LWBITS; ! 438: if(c) { ! 439: if(f == Zero || f == F) { ! 440: /* set up Rs outside loop */ ! 441: Emitop; ! 442: } ! 443: li = 0; ! 444: if(c > 1) { ! 445: Ilabel(c); ! 446: li = p; ! 447: } ! 448: if(fd) { ! 449: Fetch_Rd(!fs); ! 450: } ! 451: if(fs) { ! 452: Load_Rs_P; ! 453: sc += c; ! 454: } ! 455: if(!(f == Zero || f == F)) { ! 456: Emitop; ! 457: } ! 458: Store_Rs_P; ! 459: dc += c; ! 460: if(c > 1) { ! 461: Iloop(li); ! 462: } ! 463: } ! 464: ! 465: if(width & (WBITS-1)) { ! 466: if(fs) { ! 467: Load_Rs(0); ! 468: } ! 469: Fetch_Rd(1); ! 470: Emitop; ! 471: Ofield(rmask); ! 472: Store_Rs; ! 473: } ! 474: break; ! 475: ! 476: case Tfshl: ! 477: /* forward, src offset less than dest offset */ ! 478: Loadzx_Rt_P; ! 479: Fetch_Rd(0); ! 480: Orsha_RsRt; ! 481: sc++; ! 482: Emitop; ! 483: Ofield(lmask); ! 484: Store_Rs_P; ! 485: dc++; ! 486: width -= WBITS - doff; ! 487: ! 488: c = width >> LWBITS; ! 489: if(c) { ! 490: li = 0; ! 491: if(c > 1) { ! 492: Ilabel(c); ! 493: li = p; ! 494: } ! 495: Olshb_RsRt; ! 496: Loadzx_Rt_P; ! 497: if(fd) { ! 498: Fetch_Rd(0); ! 499: } ! 500: Oorrsha_RsRt; ! 501: sc += c; ! 502: Emitop; ! 503: Store_Rs_P; ! 504: dc += c; ! 505: if(c > 1) { ! 506: Iloop(li); ! 507: } ! 508: } ! 509: ! 510: width &= (WBITS-1); ! 511: if(width) { ! 512: Olshb_RsRt; ! 513: if(width > sha) { ! 514: Loadzx_Rt(0); ! 515: Fetch_Rd(0); ! 516: Oorrsha_RsRt; ! 517: } else { ! 518: Fetch_Rd(1); ! 519: } ! 520: Emitop; ! 521: Ofield(rmask); ! 522: Store_Rs; ! 523: } ! 524: break; ! 525: ! 526: case Tfshg: ! 527: /* forward, src offset greater than dest offset */ ! 528: Load_Rt_P; ! 529: Olsha_RsRt; ! 530: Loadzx_Rt_P; ! 531: Fetch_Rd(0); ! 532: Oorrshb_RsRt; ! 533: sc += 2; ! 534: Emitop; ! 535: Ofield(lmask); ! 536: Store_Rs_P; ! 537: dc++; ! 538: width -= WBITS - doff; ! 539: ! 540: c = width >> LWBITS; ! 541: if(c) { ! 542: li = 0; ! 543: if(c > 1) { ! 544: Ilabel(c); ! 545: li = p; ! 546: } ! 547: Olsha_RsRt; ! 548: Loadzx_Rt_P; ! 549: if(fd) { ! 550: Fetch_Rd(0); ! 551: } ! 552: Oorrshb_RsRt; ! 553: sc += c; ! 554: Emitop; ! 555: Store_Rs_P; ! 556: dc += c; ! 557: if(c > 1) { ! 558: Iloop(li); ! 559: } ! 560: } ! 561: ! 562: width &= WBITS-1; ! 563: if(width) { ! 564: Olsha_RsRt; ! 565: if(width > shb) { ! 566: Loadzx_Rt(0); ! 567: Fetch_Rd(0); ! 568: Oorrshb_RsRt; ! 569: } else { ! 570: Fetch_Rd(1); ! 571: } ! 572: Emitop; ! 573: Ofield(rmask); ! 574: Store_Rs; ! 575: } ! 576: break; ! 577: ! 578: case Tbshe: ! 579: /* backward, src and dest offsets same (or src not involved) */ ! 580: Load_Rs_D(0); ! 581: sc++; ! 582: Fetch_Rd_D(1); ! 583: Emitop; ! 584: Ofield(rmask); ! 585: Store_Rs; ! 586: dc++; ! 587: width -= WBITS - doff; ! 588: ! 589: c = width >> LWBITS; ! 590: if(c) { ! 591: li = 0; ! 592: if(c > 1) { ! 593: Ilabel(c); ! 594: li = p; ! 595: } ! 596: Load_Rs_D(0); ! 597: sc += c; ! 598: if(fd) { ! 599: Fetch_Rd_D(1); ! 600: Emitop; ! 601: Store_Rs; ! 602: } else { ! 603: Nop; ! 604: Emitop; ! 605: Store_Rs_D; ! 606: } ! 607: dc += c; ! 608: if(c > 1) { ! 609: Iloop(li); ! 610: } ! 611: } ! 612: ! 613: if(width & (WBITS-1)) { ! 614: Load_Rs_D(0); ! 615: sc++; ! 616: Fetch_Rd_D(1); ! 617: dc++; ! 618: Emitop; ! 619: Ofield(lmask); ! 620: Store_Rs; ! 621: } ! 622: break; ! 623: ! 624: case Tbshl: ! 625: /* backward, src offset less than dest offset */ ! 626: Loadzx_Rt_D(0); ! 627: Fetch_Rd_D(0); ! 628: Olsha_RsRt; ! 629: sc++; ! 630: Emitop; ! 631: Ofield(rmask); ! 632: Store_Rs; ! 633: dc++; ! 634: width -= WBITS - doff; ! 635: ! 636: c = width >> LWBITS; ! 637: if(c) { ! 638: li = 0; ! 639: if(c > 1) { ! 640: Ilabel(c); ! 641: li = p; ! 642: } ! 643: Orshb_RsRt; ! 644: Loadzx_Rt_D(0); ! 645: if(fd) { ! 646: Fetch_Rd_D(0); ! 647: } else { ! 648: Nop; ! 649: } ! 650: Oorlsha_RsRt; ! 651: sc += c; ! 652: Emitop; ! 653: if(fd) { ! 654: Store_Rs; ! 655: } else { ! 656: Store_Rs_D; ! 657: } ! 658: dc += c; ! 659: if(c > 1) { ! 660: Iloop(li); ! 661: } ! 662: } ! 663: ! 664: width &= (WBITS-1); ! 665: if(width) { ! 666: Orshb_RsRt; ! 667: if(width > sha) { ! 668: Load_Rt_D(0); ! 669: Fetch_Rd_D(0); ! 670: Oorlsha_RsRt; ! 671: sc++; ! 672: } else { ! 673: Fetch_Rd_D(1); ! 674: } ! 675: dc++; ! 676: Emitop; ! 677: Ofield(lmask); ! 678: Store_Rs; ! 679: } ! 680: break; ! 681: ! 682: case Tbshg: ! 683: /* backward, src offset greater than dest offset */ ! 684: Loadzx_Rt_D(0); ! 685: Fetch_Rd_D(0); ! 686: Orsha_RsRt; ! 687: Loadzx_Rt_D(1); ! 688: Oorlshb_RsRt; ! 689: sc += 2; ! 690: Emitop; ! 691: Ofield(rmask); ! 692: Store_Rs; ! 693: dc++; ! 694: width -= WBITS - doff; ! 695: ! 696: c = width >> LWBITS; ! 697: if(c) { ! 698: li = 0; ! 699: if(c > 1) { ! 700: Ilabel(c); ! 701: li = p; ! 702: } ! 703: Orsha_RsRt; ! 704: Loadzx_Rt_D(0); ! 705: if(fd) { ! 706: Fetch_Rd_D(0); ! 707: } else { ! 708: Nop; ! 709: } ! 710: Oorlshb_RsRt; ! 711: sc += c; ! 712: Emitop; ! 713: if(fd) { ! 714: Store_Rs; ! 715: } else { ! 716: Store_Rs_D; ! 717: } ! 718: dc += c; ! 719: if(c > 1) { ! 720: Iloop(li); ! 721: } ! 722: } ! 723: ! 724: width &= WBITS-1; ! 725: if(width) { ! 726: Orsha_RsRt; ! 727: if(width > shb) { ! 728: Loadzx_Rt_D(0); ! 729: Fetch_Rd_D(0); ! 730: sc++; ! 731: } else { ! 732: Fetch_Rd_D(1); ! 733: } ! 734: dc++; ! 735: Oorlshb_RsRt; ! 736: Emitop; ! 737: Ofield(lmask); ! 738: Store_Rs; ! 739: } ! 740: break; ! 741: ! 742: case Texp: ! 743: /* expansion: dest ldepth > src ldepth */ ! 744: if(WBITS == 8) { ! 745: b = 8 >> le; ! 746: /* db == 8, dl == 0 */ ! 747: } else { ! 748: b = (le <= 2) ? 8 : 32 / (1 << le); ! 749: db = b << le; ! 750: dl = (le <= 2) ? le : 2; ! 751: } ! 752: Inittab(tab,osiz); ! 753: ! 754: /* ! 755: * method: ! 756: * load the source a word at a time, into Rt; ! 757: * (if there is a shift, use Ru to hold next or last partial word, ! 758: * or, if WBITS == 8, it is <<8 in Rt) ! 759: * take b bits at a time from source and convert via table into Rd ! 760: * (each table lookup yields db bits, in 1<<dl bytes); ! 761: * assemble into Rs until have WBITS bits; ! 762: * fetch dest word into Rd, operate into Rs, store in dest ! 763: * ! 764: * this code needs reworking for expansion factor > 8 ! 765: */ ! 766: ! 767: if(WBITS == 8) { ! 768: if(sh == 0) { ! 769: Load_Rt_P; ! 770: sfo = 24; ! 771: } else if(sh > 0) { ! 772: Load_Rt_P; ! 773: Olsh_RtRt(8); ! 774: if((doff+width)>>le > shb) { ! 775: Loador_Rt_P; ! 776: sc++; ! 777: } ! 778: /* relevant source bits: Rt[16+sh..23+sh] */ ! 779: sfo = 16 + sh; ! 780: } else { ! 781: Load_Rt_P; ! 782: /* relevant source bits: Rt[16+(8+sh)..23+(8+sh) */ ! 783: sfo = 24 + sh; ! 784: } ! 785: sf = 0; ! 786: firstd = 1; ! 787: } else { ! 788: if(sh == 0) { ! 789: Load_Rt_P; ! 790: } else if(sh > 0) { ! 791: Load_Rt_P; ! 792: ! 793: Olsha_RtRt; ! 794: if((doff+width)>>le > shb) { ! 795: Load_Ru_P; ! 796: Oorrshb_RtRu; ! 797: sc++; ! 798: } ! 799: } else { ! 800: Load_Ru_P; ! 801: Orsha_RtRu; ! 802: } ! 803: sf = (soff - sh) & ~(b-1); ! 804: firstd = 1; ! 805: firsts = 1; ! 806: } ! 807: sc++; ! 808: while(sf < WBITS && width > 0) { ! 809: if(WBITS == 8) { ! 810: Table_RsRt(sf+sfo,b,0); ! 811: sf += b; ! 812: } else { ! 813: if(firstd) ! 814: df = (sf << le) & (WBITS-1); ! 815: else ! 816: df = 0; ! 817: while(df < WBITS) { ! 818: Table_RdRt(sf,b,dl); ! 819: if(df==0 || firsts) { ! 820: c = WBITS - (df + db); ! 821: Olsh_RsRd(c); ! 822: } else if(df == WBITS - db) { ! 823: Oor_RsRd; ! 824: } else { ! 825: c = WBITS - (df + db); ! 826: Oorlsh_RsRd(c); ! 827: } ! 828: sf += b; ! 829: df += db; ! 830: firsts = 0; ! 831: } ! 832: } ! 833: Fetch_Rd(1); ! 834: Emitop; ! 835: if(firstd) { ! 836: width -= WBITS - doff; ! 837: if(width > 0 && lmask != WMASK) { ! 838: Ofield(lmask); ! 839: } else if(width <= 0) { ! 840: lmask &= rmask; ! 841: Ofield(lmask); ! 842: } ! 843: } else { ! 844: width -= WBITS; ! 845: if(width < 0) { ! 846: Ofield(rmask); ! 847: } ! 848: } ! 849: Store_Rs_P; ! 850: dc++; ! 851: firstd = 0; ! 852: } ! 853: if(width <= 0) ! 854: break; ! 855: ! 856: c = width >> (LWBITS+le); ! 857: if(c) { ! 858: li = 0; ! 859: if(c > 1) { ! 860: Ilabel(c); ! 861: li = p; ! 862: } ! 863: if(WBITS == 8) { ! 864: Olsh_RtRt(8); ! 865: Loador_Rt_P; ! 866: } else { ! 867: if(sh == 0) { ! 868: Load_Rt_P; ! 869: } else if(sh > 0) { ! 870: Olsha_RtRu; ! 871: Load_Ru_P; ! 872: Oorrshb_RtRu; ! 873: } else { ! 874: Olshb_RtRu; ! 875: Load_Ru_P; ! 876: Oorrsha_RtRu; ! 877: } ! 878: } ! 879: sc += c; ! 880: for(sf = 0; sf < WBITS;) { ! 881: if(WBITS == 8) { ! 882: Table_RsRt(sf+sfo,b,0); ! 883: sf += b; ! 884: } else { ! 885: for(df = 0; df < WBITS;) { ! 886: Table_RdRt(sf,b,dl); ! 887: Assemblex(df,db); ! 888: sf += b; ! 889: df += db; ! 890: } ! 891: } ! 892: if(fd) { ! 893: Fetch_Rd(1); ! 894: } ! 895: Emitop; ! 896: Store_Rs_P; ! 897: dc += c; ! 898: } ! 899: if(c > 1) { ! 900: Iloop(li); ! 901: } ! 902: } ! 903: width -= c << (LWBITS+le); ! 904: if(width <= 0) ! 905: break; ! 906: ! 907: if(WBITS == 8) { ! 908: if(sh == 0) { ! 909: Load_Rt_P; ! 910: sc++; ! 911: } else if(sh > 0) { ! 912: Olsh_RtRt(8); ! 913: if(width>>le > shb) { ! 914: Loador_Rt_P; ! 915: sc++; ! 916: } ! 917: } else { ! 918: Olsh_RtRt(8); ! 919: if(width>>le > sha) { ! 920: Loador_Rt_P; ! 921: sc++; ! 922: } ! 923: } ! 924: } else { ! 925: if(sh == 0) { ! 926: Load_Rt_P; ! 927: sc++; ! 928: } else if(sh > 0) { ! 929: Olsha_RtRu; ! 930: if(width>>le > shb) { ! 931: Load_Ru_P; ! 932: Oorrshb_RtRu; ! 933: sc++; ! 934: } ! 935: } else { ! 936: Olshb_RtRu; ! 937: if(width>>le > sha) { ! 938: Load_Ru_P; ! 939: Oorrsha_RtRu; ! 940: sc++; ! 941: } ! 942: } ! 943: } ! 944: for(sf = 0; sf < WBITS && width > 0; ) { ! 945: if(WBITS == 8) { ! 946: Table_RsRt(sf+sfo,b,0); ! 947: sf += b; ! 948: } else { ! 949: for(df = 0; df < WBITS;) { ! 950: Table_RdRt(sf,b,dl); ! 951: Assemblex(df,db); ! 952: sf += b; ! 953: df += db; ! 954: } ! 955: } ! 956: Fetch_Rd(1); ! 957: Emitop; ! 958: width -= WBITS; ! 959: if(width < 0) { ! 960: Ofield(rmask); ! 961: } ! 962: Store_Rs_P; ! 963: dc++; ! 964: } ! 965: break; ! 966: ! 967: case Tcon: ! 968: /* contraction: dest ldepth < src ldepth */ ! 969: db = 8 >> -le; ! 970: Inittab(tab,osiz); ! 971: ! 972: /* ! 973: * method: ! 974: * load the source a word at a time, into Rt; ! 975: * (if there is a shift, use Ru to hold next or last partial word, ! 976: * or, if WBITS==8, it is <<8 in Rt) ! 977: * take 8 bits at a time from source and convert via table into Rd ! 978: * (each table lookup yields db bits, in 1 byte); ! 979: * assemble into Rs until have WBITS bits (takes several src words); ! 980: * fetch dest word into Rd, operate into Rs, store in dest ! 981: * ! 982: * Something should be done to improve this code, but ! 983: * it isn't used much. ! 984: */ ! 985: ! 986: if(sh < 0) { ! 987: c = (-sh)/WBITS; ! 988: sh += c*WBITS; ! 989: if(WBITS == 8) sfo = 24 + sh; ! 990: } else { ! 991: c = 0; ! 992: if(WBITS == 8) sfo = sh ? 16 + sh : 24; ! 993: } ! 994: firstd = 1; ! 995: firsts = 1; ! 996: for(df = c*db*(4/W2L); df < WBITS && df < doff + width; ) { ! 997: c = (doff + width - df) << -le; ! 998: /* c = number of source bits needed to fill rest */ ! 999: if(WBITS == 8) { ! 1000: if(sh == 0) { ! 1001: Load_Rt_P; ! 1002: sc++; ! 1003: } else if(sh > 0) { ! 1004: if(firsts) { ! 1005: Load_Rt_P; ! 1006: Olsh_RtRt(8); ! 1007: sc++; ! 1008: } else { ! 1009: Olsh_RtRt(8); ! 1010: } ! 1011: if(shb < c) { ! 1012: Loador_Rt_P; ! 1013: sc++; ! 1014: } ! 1015: } else { ! 1016: if(firsts) { ! 1017: Load_Rt_P; ! 1018: sc++; ! 1019: } else { ! 1020: Olsh_RtRt(8); ! 1021: if(sha < c) { ! 1022: Loador_Rt_P; ! 1023: sc++; ! 1024: } ! 1025: } ! 1026: } ! 1027: } else { ! 1028: if(sh == 0) { ! 1029: Load_Rt_P; ! 1030: sc++; ! 1031: } else if(sh > 0) { ! 1032: if(firsts) { ! 1033: Load_Rt_P; ! 1034: Olsha_RtRt; ! 1035: sc++; ! 1036: } else { ! 1037: Olsha_RtRu; ! 1038: } ! 1039: if(shb < c) { ! 1040: Load_Ru_P; ! 1041: sc++; ! 1042: Oorrshb_RtRu; ! 1043: } ! 1044: } else { ! 1045: if(!firsts) { ! 1046: Olshb_RtRu; ! 1047: } ! 1048: if(sha < c) { ! 1049: Load_Ru_P; ! 1050: sc++; ! 1051: if(firsts) { ! 1052: Orsha_RtRu; ! 1053: } else { ! 1054: Oorrsha_RtRu; ! 1055: } ! 1056: } ! 1057: } ! 1058: } ! 1059: firsts = 0; ! 1060: if(WBITS == 8) { ! 1061: Table_RdRt(sfo,8,0); ! 1062: if(firstd) { ! 1063: Olsh_RsRd(8-(df+db)); ! 1064: } else { ! 1065: Assemble(df,db); ! 1066: } ! 1067: df += db; ! 1068: firstd = 0; ! 1069: } else { ! 1070: for(sf = 0; sf < WBITS; ) { ! 1071: Table_RdRt(sf,8,0); ! 1072: if(firstd) { ! 1073: c = WBITS-(df+db); ! 1074: Olsh_RsRd(c); ! 1075: } else { ! 1076: Assemble(df,db); ! 1077: } ! 1078: df += db; ! 1079: sf += 8; ! 1080: firstd = 0; ! 1081: } ! 1082: } ! 1083: } ! 1084: Fetch_Rd(1); ! 1085: Emitop; ! 1086: width -= WBITS - doff; ! 1087: if(width > 0 && lmask != WMASK) { ! 1088: Ofield(lmask); ! 1089: } else if(width <= 0) { ! 1090: lmask &= rmask; ! 1091: Ofield(lmask); ! 1092: } ! 1093: Store_Rs_P; ! 1094: dc++; ! 1095: if(width <= 0) ! 1096: break; ! 1097: ! 1098: c = width >> LWBITS; ! 1099: if(c) { ! 1100: li = 0; ! 1101: if(c > 1) { ! 1102: Ilabel(c); ! 1103: li = p; ! 1104: } ! 1105: for(df = 0; df < WBITS; ) { ! 1106: if(WBITS == 8) { ! 1107: if(sh == 0) { ! 1108: Load_Rt_P; ! 1109: } else { ! 1110: Olsh_RtRt(8); ! 1111: Loador_Rt_P; ! 1112: } ! 1113: sc += c; ! 1114: Table_RdRt(sfo,8,0); ! 1115: Assemble(df,db); ! 1116: df += db; ! 1117: } else { ! 1118: if(sh == 0) { ! 1119: Load_Rt_P; ! 1120: } else if(sh > 0) { ! 1121: Olsha_RtRu; ! 1122: Load_Ru_P; ! 1123: Oorrshb_RtRu; ! 1124: } else { ! 1125: Olshb_RtRu; ! 1126: Load_Ru_P; ! 1127: Oorrsha_RtRu; ! 1128: } ! 1129: sc += c; ! 1130: for(sf = 0; sf < WBITS; ) { ! 1131: Table_RdRt(sf,8,0); ! 1132: Assemblex(df,db); ! 1133: df += db; ! 1134: sf += 8; ! 1135: } ! 1136: } ! 1137: } ! 1138: if(fd) { ! 1139: Fetch_Rd(1); ! 1140: } ! 1141: Emitop; ! 1142: Store_Rs_P; ! 1143: dc += c; ! 1144: if(c > 1) { ! 1145: Iloop(li); ! 1146: } ! 1147: } ! 1148: ! 1149: width -= c << LWBITS; ! 1150: if(width <= 0) ! 1151: break; ! 1152: ! 1153: for(df = 0; df < width; ) { ! 1154: c = (width - df) << -le; ! 1155: if(WBITS == 8) { ! 1156: if(sh == 0) { ! 1157: Load_Rt_P; ! 1158: sc++; ! 1159: } else if(sh > 0) { ! 1160: Olsh_RtRt(8); ! 1161: if(shb < c) { ! 1162: sc++; ! 1163: Loador_Rt_P; ! 1164: } ! 1165: } else { ! 1166: Olsh_RtRt(8); ! 1167: if(sha < c) { ! 1168: Loador_Rt_P; ! 1169: sc++; ! 1170: } ! 1171: } ! 1172: Table_RdRt(sfo,8,0); ! 1173: Assemble(df,db); ! 1174: df += db; ! 1175: } else { ! 1176: if(sh == 0) { ! 1177: Load_Rt_P; ! 1178: sc++; ! 1179: } else if(sh > 0) { ! 1180: Olsha_RtRu; ! 1181: if(shb < c) { ! 1182: Load_Ru_P; ! 1183: Oorrshb_RtRu; ! 1184: sc++; ! 1185: } ! 1186: } else { ! 1187: Olshb_RtRu; ! 1188: if(sha < c) { ! 1189: Load_Ru_P; ! 1190: Oorrsha_RtRu; ! 1191: sc++; ! 1192: } ! 1193: } ! 1194: for(sf = 0; sf < 32; ) { ! 1195: Table_RdRt(sf,8,0); ! 1196: Assemble(df,db); ! 1197: df += db; ! 1198: sf += 8; ! 1199: } ! 1200: } ! 1201: } ! 1202: Fetch_Rd(1); ! 1203: Emitop; ! 1204: Ofield(rmask); ! 1205: Store_Rs_P; ! 1206: dc++; ! 1207: break; ! 1208: ! 1209: } ! 1210: ! 1211: /* finish outer loop, put in rts, and execute */ ! 1212: ! 1213: if(height > 0) { ! 1214: if(backward) ! 1215: c = (dc - dspan) * (WBITS/8); ! 1216: else ! 1217: c = (dspan - dc) * (WBITS/8); ! 1218: if(c) { ! 1219: Add_Ad(c); ! 1220: } ! 1221: if(fs) { ! 1222: if(backward) ! 1223: c = (sc - sspan) * (WBITS/8); ! 1224: else ! 1225: c = (sspan - sc) * (WBITS/8); ! 1226: if(c) { ! 1227: Add_As(c); ! 1228: } ! 1229: } ! 1230: Oloop(lo); ! 1231: } ! 1232: Orts; ! 1233: #ifdef TEST ! 1234: if(onstack && p - memstart > Progmaxnoconv) ! 1235: print("Increase Progmaxnoconv to at least %d!\n", p - memstart); ! 1236: else if(p - memstart > Progmax) ! 1237: print("Increase Progmax to at least %d!\n", p - memstart); ! 1238: mem = memstart; ! 1239: #endif ! 1240: bbexec((void*)memstart, (p-memstart)*sizeof(Type), onstack); ! 1241: } ! 1242: ! 1243: #ifdef TEST ! 1244: void prprog(void); ! 1245: GBitmap *bb1, *bb2; ! 1246: ulong *src, *dst, *xdst, *xans; ! 1247: int swds, dwds; ! 1248: long ticks; ! 1249: int timeit; ! 1250: ! 1251: #ifdef BYTEREV ! 1252: ulong ! 1253: byterev(ulong v) ! 1254: { ! 1255: return (v>>24)|((v>>8)&0x0000FF00)|((v<<8)&0x00FF0000)|(v<<24); ! 1256: } ! 1257: #endif ! 1258: #ifdef T386 ! 1259: long _clock; ! 1260: #endif ! 1261: ! 1262: long ! 1263: func(int f, long s, int sld, long d, int dld) ! 1264: { ! 1265: long a; ! 1266: int sh, i, db, sb; ! 1267: ! 1268: db = 1 << dld; ! 1269: sb = 1 << sld; ! 1270: sh = db - sb; ! 1271: if(sh > 0) { ! 1272: a = s; ! 1273: for(i = sb; i<db; i += sb){ ! 1274: a <<= sb; ! 1275: s |= a; ! 1276: } ! 1277: } else if(sh < 0) ! 1278: s >>= -sh; ! 1279: ! 1280: switch(f){ ! 1281: case Zero: d = 0; break; ! 1282: case DnorS: d = ~(d|s); break; ! 1283: case DandnotS: d = d & ~s; break; ! 1284: case notS: d = ~s; break; ! 1285: case notDandS: d = ~d & s; break; ! 1286: case notD: d = ~d; break; ! 1287: case DxorS: d = d ^ s; break; ! 1288: case DnandS: d = ~(d&s); break; ! 1289: case DandS: d = d & s; break; ! 1290: case DxnorS: d = ~(d^s); break; ! 1291: case S: d = s; break; ! 1292: case DornotS: d = d | ~s; break; ! 1293: case D: d = d; break; ! 1294: case notDorS: d = ~d | s; break; ! 1295: case DorS: d = d | s; break; ! 1296: case F: d = ~0; break; ! 1297: } ! 1298: ! 1299: d &= ((1<<db)-1); ! 1300: return d; ! 1301: } ! 1302: ! 1303: void ! 1304: run(int fr, int to, int w, int op) ! 1305: { ! 1306: int i, j, f, t, fy, ty; ! 1307: extern long *_clock; ! 1308: ! 1309: fr += bb2->r.min.x; ! 1310: to += bb1->r.min.x; ! 1311: fy = bb2->r.min.y + 1; ! 1312: ty = bb1->r.min.y + 1; ! 1313: if(timeit) { ! 1314: memcpy(dst, xdst, dwds * sizeof(long)); ! 1315: ticks -= *_clock; ! 1316: gbitblt(bb1, Pt(to,ty), bb2, Rect(fr,fy,fr+w,fy+2), op); ! 1317: ticks += *_clock; ! 1318: return; ! 1319: } ! 1320: f = fr; ! 1321: t = to; ! 1322: memcpy(dst, xdst, dwds * sizeof(long)); ! 1323: for(i=0; i<w; i++) { ! 1324: gbitblt(bb1, Pt(t,ty), bb2, Rect(f,fy,f+1,fy+1), op); ! 1325: gbitblt(bb1, Pt(t,ty+1), bb2, Rect(f,fy+1,f+1,fy+2), op); ! 1326: f++; ! 1327: t++; ! 1328: } ! 1329: memcpy(xans, dst, dwds * sizeof(long)); ! 1330: ! 1331: memcpy(dst, xdst, dwds * sizeof(long)); ! 1332: gbitblt(bb1, Pt(to,ty), bb2, Rect(fr,fy,fr+w,fy+2), op); ! 1333: ! 1334: if(memcmp(xans, dst, dwds * sizeof(long))) { ! 1335: /* ! 1336: * print src and dst row offset, width in bits, and forw/back ! 1337: * then print for each of the four rows: the source (s), ! 1338: * the dest (d), the good value of the answer (g), ! 1339: * and the actual bad value of the answer (b) ! 1340: */ ! 1341: print("fr=%d to=%d w=%d fb=%d%d\n", ! 1342: fr, to, w, FORCEFORW, FORCEBAKW); ! 1343: print("dst bitmap b %#lux, z %d, w %d, ld %d, r [%d,%d][%d,%d]\n", ! 1344: bb1->base, bb1->zero, bb1->width, bb1->ldepth, ! 1345: bb1->r.min.x, bb1->r.min.y, bb1->r.max.x, bb1->r.max.y); ! 1346: print("src bitmap b %#lux, z %d, w %d, ld %d, r [%d,%d][%d,%d]\n", ! 1347: bb2->base, bb2->zero, bb2->width, bb2->ldepth, ! 1348: bb2->r.min.x, bb2->r.min.y, bb2->r.max.x, bb2->r.max.y); ! 1349: for(j=0; 7*j < dwds; j++) { ! 1350: print("\ns"); ! 1351: for(i=0; i<7 && 7*j+i < dwds; i++) ! 1352: print(" %.8lux", src[7*j + i]); ! 1353: print("\nd"); ! 1354: for(i=0; i<7 && 7*j+i < dwds; i++) ! 1355: print(" %.8lux", xdst[7*j + i]); ! 1356: print("\ng"); ! 1357: for(i=0; i<7 && 7*j+i < dwds; i++) ! 1358: print(" %.8lux", xans[7*j + i]); ! 1359: print("\nb"); ! 1360: for(i=0; i<7 && 7*j+i < dwds; i++) ! 1361: print(" %.8lux", dst[7*j + i]); ! 1362: print("\n"); ! 1363: } ! 1364: prprog(); ! 1365: } ! 1366: } ! 1367: ! 1368: void ! 1369: main(int argc, char *argv[]) ! 1370: { ! 1371: int f, t, w, i, sld, dld, op, iters, simple; ! 1372: ulong s, d, spix, dpix, apix, fpix, m, *ps, *pd; ! 1373: Point sorg, dorg; ! 1374: GBitmap *bs, *bd; ! 1375: long seed; ! 1376: char *ct; ! 1377: ! 1378: sld = 0; ! 1379: dld = 0; ! 1380: timeit = 0; ! 1381: iters = 200; ! 1382: simple = 0; ! 1383: ARGBEGIN { ! 1384: case 'i': ! 1385: iters = atoi(ARGF()); ! 1386: break; ! 1387: case 's': ! 1388: simple = 1; ! 1389: break; ! 1390: case 't': ! 1391: timeit = 1; ! 1392: ct = ARGF(); ! 1393: if(ct) ! 1394: iters = atoi(ct); ! 1395: break; ! 1396: } ARGEND ! 1397: if(argc > 0) ! 1398: sld = atoi(argv[0]); ! 1399: if(argc > 1) ! 1400: dld = atoi(argv[1]); ! 1401: if(sld < 0 || sld > 3 || dld < 0 || dld > 3 || ! 1402: (sld != dld && !tabs[sld][dld])){ ! 1403: print("conversion from ldepth %d to %d not enabled\n", ! 1404: sld, dld); ! 1405: exits(0); ! 1406: } ! 1407: if(!timeit && !simple) { ! 1408: seed = time(0); ! 1409: print("seed %lux\n", seed); srand(seed); /**/ ! 1410: } ! 1411: ! 1412: print("sld %d dld %d\n", sld, dld); ! 1413: op = 1/*Zero*/; ! 1414: ! 1415: /* bitmaps for 1-bit tests */ ! 1416: bd = gballoc(Rect(0,0,32,1), dld); ! 1417: bs = gballoc(Rect(0,0,32,1), sld); ! 1418: for(i=0; i<bs->width; i++) ! 1419: bs->base[i] = lrand(); ! 1420: ! 1421: /* bitmaps for rect tests */ ! 1422: if(simple) { ! 1423: dorg = Pt(0,0); ! 1424: sorg = Pt(0,0); ! 1425: } else { ! 1426: dorg = Pt(nrand(63)-31,nrand(63)-31); ! 1427: sorg = Pt(nrand(63)-31,nrand(63)-31); ! 1428: } ! 1429: bb1 = gballoc(Rpt(dorg,add(dorg,Pt(200,4))), dld); ! 1430: bb2 = gballoc(Rpt(sorg,add(sorg,Pt(200,4))), sld); ! 1431: dwds = bb1->width * Dy(bb1->r); ! 1432: swds = bb2->width * Dy(bb2->r); ! 1433: dst = bb1->base; ! 1434: src = bb2->base; ! 1435: xdst = malloc(dwds * sizeof(long)); ! 1436: xans = malloc(dwds * sizeof(long)); ! 1437: for(i=0; i<swds; i++) ! 1438: src[i] = lrand(); ! 1439: for(i=0; i<dwds; i++) ! 1440: xdst[i] = lrand(); ! 1441: loop: ! 1442: print("Op %d\n", op); ! 1443: if(!timeit) { ! 1444: print("one pixel\n"); ! 1445: ps = bs->base; ! 1446: pd = bd->base; ! 1447: FORCEFORW = 1; ! 1448: FORCEBAKW = 0; ! 1449: for(i=0; i<1000; i++, FORCEFORW = !FORCEFORW, FORCEBAKW = !FORCEBAKW) { ! 1450: f = nrand(32 >> sld); ! 1451: t = nrand(32 >> dld); ! 1452: s = lrand(); ! 1453: d = lrand(); ! 1454: ps[0] = s; ! 1455: pd[0] = d; ! 1456: #ifdef BYTEREV ! 1457: spix = scrpix(byterev(s),f,sld); ! 1458: dpix = scrpix(byterev(d),t,dld); ! 1459: #else ! 1460: spix = scrpix(s,f,sld); ! 1461: dpix = scrpix(d,t,dld); ! 1462: #endif ! 1463: apix = func(op, spix, sld, dpix, dld); ! 1464: gbitblt(bd, Pt(t,0), bs, Rect(f,0,f+1,1), op); ! 1465: if(ps[0] != s) { ! 1466: print("bb src %.8lux %.8lux %d %d\n", ps[0], s, f, t); ! 1467: exits("error"); ! 1468: } ! 1469: m = scrmask(t,dld); ! 1470: #ifdef BYTEREV ! 1471: m = byterev(m); ! 1472: #endif ! 1473: if((pd[0] & ~m) != (d & ~m)) { ! 1474: print("bb dst1 %.8lux %.8lux\n", ! 1475: s, d); ! 1476: print("bb %.8lux %.8lux %d %d\n", ! 1477: ps[0], pd[0], f, t); ! 1478: prprog(); ! 1479: exits("error"); ! 1480: } ! 1481: #ifdef BYTEREV ! 1482: fpix = scrpix(byterev(pd[0]),t,dld); ! 1483: #else ! 1484: fpix = scrpix(pd[0],t,dld); ! 1485: #endif ! 1486: if(apix != fpix) { ! 1487: print("bb dst2 %.8lux %.8lux\n", ! 1488: s, d); ! 1489: print("bb %.8lux %.8lux %d %d\n", ! 1490: ps[0], pd[0], f, t); ! 1491: print("bb %.8lux %.8lux %.8lux %.8lux\n", ! 1492: spix, dpix, apix, fpix); ! 1493: prprog(); ! 1494: exits("error"); ! 1495: } ! 1496: } ! 1497: } ! 1498: ! 1499: print("for\n"); ! 1500: FORCEFORW = 1; ! 1501: FORCEBAKW = 0; ! 1502: ! 1503: for(i=0; i<iters; i++) { ! 1504: f = nrand(64); ! 1505: t = nrand(64); ! 1506: w = nrand(130); ! 1507: run(f, t, w, op); ! 1508: } ! 1509: ! 1510: if(sld == dld) { ! 1511: print("bak\n"); ! 1512: FORCEFORW = 0; ! 1513: FORCEBAKW = 1; ! 1514: ! 1515: for(i=0; i<iters; i++) { ! 1516: f = nrand(64); ! 1517: t = nrand(64); ! 1518: w = nrand(130); ! 1519: run(f, t, w, op); ! 1520: } ! 1521: } ! 1522: ! 1523: if(op < F) { ! 1524: op++; ! 1525: goto loop; ! 1526: } ! 1527: if(timeit) ! 1528: print("time: %d ticks\n", ticks); ! 1529: exits(0); ! 1530: } ! 1531: ! 1532: ! 1533: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.