File:  [Plan 9 NeXT] / lucent / sys / src / libgnot / gbitblt.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 18:01:05 2018 UTC (8 years, 1 month ago) by root
Branches: lucent, MAIN
CVS tags: plan9, HEAD
Plan 9 NeXT

#include	<u.h>
#include	<libc.h>
#include	<libg.h>
#include	"/sys/include/gnot.h"
/*
 * Compiling bitblt - for each call of gbitblt, generate
 * machine code for the specific arguments passed in,
 * then execute that code as a subroutine call.
 *
 * The files bb?.h (where ? is replaced by the
 * architectures single-letter code: v, k, etc., depending
 * on which of Tmips, Tsparc, etc., is defined) define macros
 * which define operations on an abstract machine.
 * If there is no architecture-specific file, bbc.h is used,
 * which interprets a program with the same operations as
 * the assumed abstract machine.  See bbc.h for a description
 * of the machine's registers and operations, and for the
 * macros that must be defined to make a new bb?.h
 *
 * The code will work for bitblts to and from any bitmaps
 * with ldepths 0, 1, 2, or 3.  Some of those conversions
 * may be #ifdef'd out, because no Plan 9 code currently
 * uses them, and the tables take up space.  The converting
 * bitblts will need work if they are to work with ldepths > 3.
 *
 * This file also contains a thorough bitblt tester.
 * When TEST is defined, a main program is created to
 * try forward and backward cases for all bitblt opcodes,
 * first on single pixels, and then on parts of the middle two rows
 * of random 4-row bitmaps.  If anything fails, it prints information
 * about the case that failed (or perhaps it will just die,
 * if bad code has been generated).
 * The testing programs takes as arguments two numbers: the source
 * and destination ldepths (both 0 by default).  A -s flag says to
 * use simpler, repeatable tests.  A -i num flag says how many
 * iterations to do for each opcode.
 */

/* Bitblt cases:
 * - bitmap overlap sometimes dictates that you go forward (f) through
 *   the bitmaps, sometimes backwards (b)
 * - different relative alignments of the source and destination
 *   starting points within a word require different code:
 *   the bit offsets may be the same (e), the source may start
 *   later in the word (g), or the source may start earlier in the word (l)
 * - when a row of the destination is all within one word (o), better
 *   code can be generated
 * - when the object machine has bitfield extraction/insertion instructions,
 *   it is better to do < 32bit wide bitblts using them (bf)
 * - if the source and destination bitmaps have different depths,
 *   it is either and expansion (exp) or a contraction (con) of pixels.
 *   These two cases aren't further differentiated into f vs. b, etc.
 */

/*
 * To calculate the potential size of the bitblt program, use the following
 * formulas:
 *  nonconverting max (bshg): X + 10L + 6S + 2F + 3E + 8LX + 3SX
 *  converting max (contracting by factor of 8, 32-bit memory accesses):
 *    X + XT + 46L + 48S + 96T + 64A + 32AX + 2F + 3E + 30LX + 48SX
 *
 * where X = Initsd+Extrainit+Iloop+Oloop+Rts  ; XT = Inittab
 *       E = Emitop  ;  F = Field
 *       L = load, fetch or store ; LX = extra if pre or post decrement
 *       S = shift (sha or shb)   ; SX = extra if OR too
 *       T = Table  ;  A = Assemble  ; AX = Assemblex
 */
enum
{
	Tfshe	= 0,	/* each of the triples must be in order e, l, g */
	Tfshl,
	Tfshg,

	Tbshe,
	Tbshl,
	Tbshg,

	Toshe,
	Toshl,
	Toshg,

	Tobf,

	Texp,

	Tcon,

	Tlast,		/* total number of cases */
};

#ifdef TEST
/*
 * globals used for testing
 */
int	FORCEFORW;
int	FORCEBAKW;
GBitmap	*curdm, *cursm;
Point	curpt;
Rectangle curr;
Fcode	curf;
void	*mem;
#endif

/*
 * set up to compile -DT$objtype
 */
#ifdef	Tmips
#include	"bbv.h"
#else
#ifdef	T68020
#include	"bb2.h"
#else
#ifdef	Tsparc
#include	"bbk.h"
#else
#ifdef	T386
#include	"bb8l.h"
#else
#ifdef	Thobbit
#include	"bbcl.h"
#else
#include	"bbc.h"
#endif
#endif
#endif
#endif
#endif

/*
 * bitblt operates a 'word' at a time.
 * WBITS is the number of bits in a word
 * LWBITS=log2(WBITS),
 * W2L is the number of words in a long
 * WMASK has bits set for the low order word of a long
 * WType is a pointer to a word
 * if LENDIAN is true, then left-to-right in bitmap
 * means low-order-bit to high-order-bit within a word,
 * otherwise it is high-order-bit to low-order-bit.
 */
#ifndef WBITS
#define WBITS	32
#define LWBITS	5
#define	W2L	1
#define WMASK	~0UL
typedef ulong	*WType;
#endif
/*
 * scrshl(v,o) shifts a word v by o bits screen-leftward
 * scrshr(v,o) shifts a word v by o bits screen-rightward
 * scrpix(v,i,l) gets the value of pixel i within word v when ldepth is l
 * scrmask(i,l) has ones for pixel i when ldepth is l
 */
#define scrshl(v,o)	(LENDIAN? ((v)>>(o)) : ((v)<<(o)))
#define scrshr(v,o)	(LENDIAN? ((v)<<(o)) : ((v)>>(o)))
#define scrpix(v,i,l)	(LENDIAN? (((v)>>((i)<<(l)))&((1<<(1<<(l)))-1)) : (((v)>>(32-(((i)+1)<<(l))))&((1<<(1<<(l)))-1)))
#define scrmask(i,l)	(LENDIAN? (((1<<(1<<(l)))-1)<<((i)<<(l))) : (((1<<(1<<(l)))-1)<<(32-(((i)+1)<<l))))

void
gbitblt(GBitmap *dm, Point pt, GBitmap *sm, Rectangle r, Fcode fcode)
{
	int	type;		/* category of bitblt: Tfshe or ... */
	int	width;		/* width in bits of dst */
	int	height;		/* height in pixels minus 1 */
	int	sh;		/* left shift of src to align with dst */
	int	soff;		/* bit offset of src start point */
	int	doff;		/* bit offset of dst start point */
	int	sc;		/* src words used so far */
	int 	dc;		/* dst words used so far */
	int	le;		/* log expansion factor */
	int	sspan;		/* words between scanlines in src */
	int	dspan;		/* words between scanlines in dst */
	int	sdep;		/* src ldepth */
	int 	ddep;		/* dst ldepth */
	int	onstack;	/* compiling to stack arena */
	int	backward;	/* does bitblt have to go backwards? */
	ulong*	saddr;		/* addr of word in src containing start point */
	ulong*	daddr;		/* addr of word in dst containing start point */
	ulong	lmask;		/* affected pixels in leftmost dst word */
	ulong	rmask;		/* affected pixels in rightmost dst word */
	Type*	lo;		/* addr in program for beginning of outer loop */
	Type*	li;		/* addr in program for beginning of inner loop */
	uchar	*tab;		/* conversion table */
	int	osiz;		/* size of table entries, in bytes */
	Type*	memstart;	/* start of program */
	Type*	p;		/* next free address in program */
	Type*	fi;		/* pointer to beginning of instrs for Rs f= Rd */
	int	fin;		/* number of Types to copy after fi */
	long	v;		/* for use in Emitop macro */
	int	c;		/* a count */
	int	fs;		/* if need to fetch source */
	int	fd;		/* if need to fetch dest */
	int	b;		/* for expansion: take b bits at a time from src */
	int	db;		/* number of bits yielded by table lookup */
	int	dl;		/* log2 number of bytes yielded by table lookup */
	int	sf;		/* a bit offset in src */
	int 	df;		/* a bit offset in dst */
	int	firstd;		/* doing first part of dst ? */
	int	firsts;		/* doing first part of src ? */
	int	f;		/* int version of fcode */
	long	tmp;		/* for use by some macros */
	int	sha;		/* |sh|%32 */
	int	shb;		/* WBITS-sha, if sha!=0 */
	int	sfo;		/* bit offset origin (needed if WBITS==8) */
	Fstr	*pf;
	Type	arena[Progmaxnoconv];	/* for non-converting bitblts */

	onstack = bbonstack();
	gbitbltclip(&dm);

#ifdef TEST
	curdm = dm;
	cursm = sm;
	curpt = pt;
	curr = r;
	curf = fcode;
#endif

	width = r.max.x - r.min.x;
	if(width <= 0)
		return;
	height = r.max.y - r.min.y - 1;
	if(height < 0)
		return;
	ddep = dm->ldepth;
	pt.x <<= ddep;
	width <<= ddep;

	sdep = sm->ldepth;
	r.min.x <<= sdep;
	r.max.x <<= sdep;

	dspan = dm->width * W2L;
	sspan = sm->width * W2L;

	daddr = (ulong*)((WType)dm->base
			+ dm->zero*W2L + pt.y*dspan
			+ (pt.x >> LWBITS));
	saddr = (ulong*)((WType)sm->base
			+ sm->zero*W2L + r.min.y*sspan
			+ (r.min.x >> LWBITS));

	c = doff = pt.x & (WBITS-1);
	soff = r.min.x & (WBITS-1);

	pf = &fstr[(f = fcode&0xF)];
	fs = pf->fetchs;
	fd = pf->fetchd;
	fin = pf->n;
	fi = (Type *)pf->instr;

	if(ddep == sdep || !fs) {
#ifdef TEST
		if(!FORCEBAKW &&
		   (FORCEFORW || sm != dm || !fs || saddr > daddr ||
		    (saddr == daddr && soff > doff)))
			backward = 0;
		else
			backward = 1;
#else
		if(sm != dm || !fs || saddr > daddr ||
		  (saddr == daddr && soff > doff))
			backward = 0;
		else
			backward = 1;
#endif
#ifdef HAVEBF
		if(width <= WBITS) {
			sh = 0;
			type = Tobf;
			if(backward) {
				daddr = (ulong *)((WType)daddr + height*dspan);
				saddr = (ulong *)((WType)saddr + height*sspan);
			}
			goto init;
		}
#else
		if(doff+width <= WBITS) {
			type = Toshe;
			if(backward) {
				daddr = (ulong *)((WType)daddr + height*dspan);
				saddr = (ulong *)((WType)saddr + height*sspan);
			}
		}
#endif
		else {
			if(!backward)
				type = Tfshe;
			else {
				type = Tbshe;
				doff = (WBITS-(doff+width)) & (WBITS-1);
				soff = (WBITS-(soff+width)) & (WBITS-1);
				daddr = (ulong*)((WType)dm->base
					+ dm->zero*W2L + (pt.y+height)*dspan
					+ ((pt.x + width+(WBITS-1))>>LWBITS));
				saddr = (ulong*)((WType)sm->base
					+ sm->zero*W2L + (r.max.y-1)*sspan
					+ ((r.max.x + (WBITS-1))>>LWBITS));
			}
		}
		if(fs) {
			if((sh = soff - doff) != 0) {
				if(sh < 0)
					type += Tbshl-Tbshe;
				else
					type += Tbshg-Tbshe;
			}
		} else
			sh = 0;
	} else {
		if(sdep < 0 || sdep > 3 ||
		   ddep < 0 || ddep > 3 ||
		   (tab = tabs[sdep][ddep]) == 0)
			return;	/* sorry, conversion not enabled */
		
		osiz = tabosiz[sdep][ddep];
		le = ddep - sdep;
		if(le > 0) {
			type = Texp;
			sh = soff - (doff >> le);
		} else {
			type = Tcon;
			sh = soff - (doff << -le);
		}
		onstack = 0;
		backward = 0;
	}

	/* c has original doff (relative to beginning) */
	lmask = scrshr(WMASK,c);
	rmask = scrshl(WMASK,(WBITS - ((c+width) & (WBITS-1))))&WMASK;
	if(!rmask)
		rmask = WMASK;
	if(sh != 0) {
		if(sh > 0)
			sha = sh;
		else
			sha = (-sh)&(WBITS-1);
		shb = WBITS - sha;
	}

	/* init: set up constant regs and outer loop */
    init:
	if(onstack)
		memstart = arena;
	else
		memstart = (Type*)bbmalloc(Progmax * sizeof(Type));
	p = memstart;
	Initsd(saddr,daddr);
	if(sh) {
		Initsh(sha,shb);
	}
	Extrainit;

	if(height > 0) {
		Olabel(height+1);
		lo = p;
	}
	sc = 0;
	dc = 0;

	/* emit inner loop */
	switch(type){
#ifdef HAVEBF
	case Tobf:
		if(fd) {
			Bfextu_RdAd(doff,width);
		}
		if(fs) {
			Bfextu_RsAs(soff,width);
		}
		Emitop;
		Bfins_AdRs(doff,width);
		break;
#else
	case Toshe:
		/* one word dest, src and dest offsets same (or src not involved) */
		lmask &= rmask;
		if(fs) {
			Load_Rs(0);
		}
		Fetch_Rd(1);
		Emitop;
		Ofield(lmask);
		Store_Rs;
		break;

	case Toshl:
		/* one word dest, src offset less than dest offset */
		lmask &= rmask;
		Loadzx_Rt(0);
		Fetch_Rd(0);
		Orsha_RsRt;
		Emitop;
		Ofield(lmask);
		Store_Rs;
		break;

	case Toshg:
		/* one word dest, src offset greater than dest offset */
		lmask &= rmask;
		if(sha+doff+width > WBITS) {
			Load_Rt_P;
			Olsha_RsRt;
			Loadzx_Rt(0);
			Fetch_Rd(0);
			Oorrshb_RsRt;
			if(backward)
				sc--;
			else
				sc++;
		} else {
			Load_Rt(0);
			Fetch_Rd(0);
			Olsha_RsRt;
		}
		Emitop;
		Ofield(lmask);
		Store_Rs;
		break;
#endif /* HAVEBF */

	case Tfshe:
		/* forward, src and dest offsets same (or src not involved) */
		Fetch_Rd(0);
		if(fs) {
			Load_Rs_P;
			sc++;
		} else {
			Nop;
		}
		Emitop;
		Ofield(lmask);
		Store_Rs_P;
		dc++;
		width -= WBITS - doff;
	
		c = width >> LWBITS;
		if(c) {
			if(f == Zero || f == F) {
				/* set up Rs outside loop */
				Emitop;
			}
			li = 0;
			if(c > 1) {
				Ilabel(c);
				li = p;
			}
			if(fd) {
				Fetch_Rd(!fs);
			}
			if(fs) {
				Load_Rs_P;
				sc += c;
			}
			if(!(f == Zero || f == F)) {
				Emitop;
			}
			Store_Rs_P;
			dc += c;
			if(c > 1) {
				Iloop(li);
			}
		}
	
		if(width & (WBITS-1)) {
			if(fs) {
				Load_Rs(0);
			}
			Fetch_Rd(1);
			Emitop;
			Ofield(rmask);
			Store_Rs;
		}
		break;

	case Tfshl:
		/* forward, src offset less than dest offset */
		Loadzx_Rt_P;
		Fetch_Rd(0);
		Orsha_RsRt;
		sc++;
		Emitop;
		Ofield(lmask);
		Store_Rs_P;
		dc++;
		width -= WBITS - doff;
	
		c = width >> LWBITS;
		if(c) {
			li = 0;
			if(c > 1) {
				Ilabel(c);
				li = p;
			}
			Olshb_RsRt;
			Loadzx_Rt_P;
			if(fd) {
				Fetch_Rd(0);
			}
			Oorrsha_RsRt;
			sc += c;
			Emitop;
			Store_Rs_P;
			dc += c;
			if(c > 1) {
				Iloop(li);
			}
		}
	
		width &= (WBITS-1);
		if(width) {
			Olshb_RsRt;
			if(width > sha) {
				Loadzx_Rt(0);
				Fetch_Rd(0);
				Oorrsha_RsRt;
			} else {
				Fetch_Rd(1);
			}
			Emitop;
			Ofield(rmask);
			Store_Rs;
		}
		break;

	case Tfshg:
		/* forward, src offset greater than dest offset */
		Load_Rt_P;
		Olsha_RsRt;
		Loadzx_Rt_P;
		Fetch_Rd(0);
		Oorrshb_RsRt;
		sc += 2;
		Emitop;
		Ofield(lmask);
		Store_Rs_P;
		dc++;
		width -= WBITS - doff;
	
		c = width >> LWBITS;
		if(c) {
			li = 0;
			if(c > 1) {
				Ilabel(c);
				li = p;
			}
			Olsha_RsRt;
			Loadzx_Rt_P;
			if(fd) {
				Fetch_Rd(0);
			}
			Oorrshb_RsRt;
			sc += c;
			Emitop;
			Store_Rs_P;
			dc += c;
			if(c > 1) {
				Iloop(li);
			}
		}
	
		width &= WBITS-1;
		if(width) {
			Olsha_RsRt;
			if(width > shb) {
				Loadzx_Rt(0);
				Fetch_Rd(0);
				Oorrshb_RsRt;
			} else {
				Fetch_Rd(1);
			}
			Emitop;
			Ofield(rmask);
			Store_Rs;
		}
		break;

	case Tbshe:
		/* backward, src and dest offsets same (or src not involved) */
		Load_Rs_D(0);
		sc++;
		Fetch_Rd_D(1);
		Emitop;
		Ofield(rmask);
		Store_Rs;
		dc++;
		width -= WBITS - doff;
	
		c = width >> LWBITS;
		if(c) {
			li = 0;
			if(c > 1) {
				Ilabel(c);
				li = p;
			}
			Load_Rs_D(0);
			sc += c;
			if(fd) {
				Fetch_Rd_D(1);
				Emitop;
				Store_Rs;
			} else {
				Nop;
				Emitop;
				Store_Rs_D;
			}
			dc += c;
			if(c > 1) {
				Iloop(li);
			}
		}
	
		if(width & (WBITS-1)) {
			Load_Rs_D(0);
			sc++;
			Fetch_Rd_D(1);
			dc++;
			Emitop;
			Ofield(lmask);
			Store_Rs;
		}
		break;

	case Tbshl:
		/* backward, src offset less than dest offset */
		Loadzx_Rt_D(0);
		Fetch_Rd_D(0);
		Olsha_RsRt;
		sc++;
		Emitop;
		Ofield(rmask);
		Store_Rs;
		dc++;
		width -= WBITS - doff;
	
		c = width >> LWBITS;
		if(c) {
			li = 0;
			if(c > 1) {
				Ilabel(c);
				li = p;
			}
			Orshb_RsRt;
			Loadzx_Rt_D(0);
			if(fd) {
				Fetch_Rd_D(0);
			} else {
				Nop;
			}
			Oorlsha_RsRt;
			sc += c;
			Emitop;
			if(fd) {
				Store_Rs;
			} else {
				Store_Rs_D;
			}
			dc += c;
			if(c > 1) {
				Iloop(li);
			}
		}
	
		width &= (WBITS-1);
		if(width) {
			Orshb_RsRt;
			if(width > sha) {
				Load_Rt_D(0);
				Fetch_Rd_D(0);
				Oorlsha_RsRt;
				sc++;
			} else {
				Fetch_Rd_D(1);
			}
			dc++;
			Emitop;
			Ofield(lmask);
			Store_Rs;
		}
		break;

	case Tbshg:
		/* backward, src offset greater than dest offset */
		Loadzx_Rt_D(0);
		Fetch_Rd_D(0);
		Orsha_RsRt;
		Loadzx_Rt_D(1);
		Oorlshb_RsRt;
		sc += 2;
		Emitop;
		Ofield(rmask);
		Store_Rs;
		dc++;
		width -= WBITS - doff;
	
		c = width >> LWBITS;
		if(c) {
			li = 0;
			if(c > 1) {
				Ilabel(c);
				li = p;
			}
			Orsha_RsRt;
			Loadzx_Rt_D(0);
			if(fd) {
				Fetch_Rd_D(0);
			} else {
				Nop;
			}
			Oorlshb_RsRt;
			sc += c;
			Emitop;
			if(fd) {
				Store_Rs;
			} else {
				Store_Rs_D;
			}
			dc += c;
			if(c > 1) {
				Iloop(li);
			}
		}
	
		width &= WBITS-1;
		if(width) {
			Orsha_RsRt;
			if(width > shb) {
				Loadzx_Rt_D(0);
				Fetch_Rd_D(0);
				sc++;
			} else {
				Fetch_Rd_D(1);
			}
			dc++;
			Oorlshb_RsRt;
			Emitop;
			Ofield(lmask);
			Store_Rs;
		}
		break;

	case Texp:
		/* expansion: dest ldepth > src ldepth */
		if(WBITS == 8) {
			b = 8 >> le;
			/* db == 8, dl == 0 */
		} else {
			b = (le <= 2) ? 8 : 32 / (1 << le);
			db = b << le;
			dl = (le <= 2) ? le : 2;
		}
		Inittab(tab,osiz);
	
		/*
		 * method:
		 * load the source a word at a time, into Rt;
		 * (if there is a shift, use Ru to hold next or last partial word,
		 *  or, if WBITS == 8, it is  <<8 in Rt)
		 * take b bits at a time from source and convert via table into Rd
		 * (each table lookup yields db bits, in 1<<dl bytes);
		 * assemble into Rs until have WBITS bits;
		 * fetch dest word into Rd, operate into Rs, store in dest
		 *
		 * this code needs reworking for expansion factor > 8
		 */

		if(WBITS == 8) {
			if(sh == 0) {
				Load_Rt_P;
				sfo = 24;
			} else if(sh > 0) {
				Load_Rt_P;
				Olsh_RtRt(8);
				if((doff+width)>>le  > shb) {
					Loador_Rt_P;
					sc++;
				}
				/* relevant source bits: Rt[16+sh..23+sh] */
				sfo = 16 + sh;
			} else {
				Load_Rt_P;
				/* relevant source bits: Rt[16+(8+sh)..23+(8+sh) */
				sfo = 24 + sh;
			}
			sf = 0;
			firstd = 1;
		} else {
			if(sh == 0) {
				Load_Rt_P;
			} else if(sh > 0) {
				Load_Rt_P;
				
				Olsha_RtRt;
				if((doff+width)>>le > shb) {
					Load_Ru_P;
					Oorrshb_RtRu;
					sc++;
				}
			} else {
				Load_Ru_P;
				Orsha_RtRu;
			}
			sf = (soff - sh) & ~(b-1);
			firstd = 1;
			firsts = 1;
		}
		sc++;
		while(sf < WBITS && width > 0) {
			if(WBITS == 8) {
				Table_RsRt(sf+sfo,b,0);
				sf += b;
			} else {
				if(firstd)
					df = (sf << le) & (WBITS-1);
				else
					df = 0;
				while(df < WBITS) {
					Table_RdRt(sf,b,dl);
					if(df==0 || firsts) {
						c = WBITS - (df + db);
						Olsh_RsRd(c);
					} else if(df == WBITS - db) {
						Oor_RsRd;
					} else {
						c = WBITS - (df + db);
						Oorlsh_RsRd(c);
					}
					sf += b;
					df += db;
					firsts = 0;
				}
			}
			Fetch_Rd(1);
			Emitop;
			if(firstd) {
				width -= WBITS - doff;
				if(width > 0 && lmask != WMASK) {
					Ofield(lmask);
				} else if(width <= 0) {
					lmask &= rmask;
					Ofield(lmask);
				}
			} else {
				width -= WBITS;
				if(width < 0) {
					Ofield(rmask);
				}
			}
			Store_Rs_P;
			dc++;
			firstd = 0;
		}
		if(width <= 0)
			break;
	
		c = width >> (LWBITS+le);
		if(c) {
			li = 0;
			if(c > 1) {
				Ilabel(c);
				li = p;
			}
			if(WBITS == 8) {
				Olsh_RtRt(8);
				Loador_Rt_P;
			} else {
				if(sh == 0) {
					Load_Rt_P;
				} else if(sh > 0) {
					Olsha_RtRu;
					Load_Ru_P;
					Oorrshb_RtRu;
				} else {
					Olshb_RtRu;
					Load_Ru_P;
					Oorrsha_RtRu;
				}
			}
			sc += c;
			for(sf = 0; sf < WBITS;) {
				if(WBITS == 8) {
					Table_RsRt(sf+sfo,b,0);
					sf += b;
				} else {
					for(df = 0; df < WBITS;) {
						Table_RdRt(sf,b,dl);
						Assemblex(df,db);
						sf += b;
						df += db;
					}
				}
				if(fd) {
					Fetch_Rd(1);
				}
				Emitop;
				Store_Rs_P;
				dc += c;
			}
			if(c > 1) {
				Iloop(li);
			}
		}
		width -= c << (LWBITS+le);
		if(width <= 0)
			break;

		if(WBITS == 8) {
			if(sh == 0) {
				Load_Rt_P;
				sc++;
			} else if(sh > 0) {
				Olsh_RtRt(8);
				if(width>>le  > shb) {
					Loador_Rt_P;
					sc++;
				}
			} else {
				Olsh_RtRt(8);
				if(width>>le > sha) {
					Loador_Rt_P;
					sc++;
				}
			}
		} else {
			if(sh == 0) {
				Load_Rt_P;
				sc++;
			} else if(sh > 0) {
				Olsha_RtRu;
				if(width>>le  > shb) {
					Load_Ru_P;
					Oorrshb_RtRu;
					sc++;
				}
			} else {
				Olshb_RtRu;
				if(width>>le > sha) {
					Load_Ru_P;
					Oorrsha_RtRu;
					sc++;
				}
			}
		}
		for(sf = 0; sf < WBITS && width > 0; ) {
			if(WBITS == 8) {
				Table_RsRt(sf+sfo,b,0);
				sf += b;
			} else {
				for(df = 0; df < WBITS;) {
					Table_RdRt(sf,b,dl);
					Assemblex(df,db);
					sf += b;
					df += db;
				}
			}
			Fetch_Rd(1);
			Emitop;
			width -= WBITS;
			if(width < 0) {
				Ofield(rmask);
			}
			Store_Rs_P;
			dc++;
		}
		break;

	case Tcon:
		/* contraction: dest ldepth < src ldepth */
		db = 8 >> -le;
		Inittab(tab,osiz);
	
		/*
		 * method:
		 * load the source a word at a time, into Rt;
		 * (if there is a shift, use Ru to hold next or last partial word,
		 * or, if WBITS==8, it is <<8 in Rt)
		 * take 8 bits at a time from source and convert via table into Rd
		 * (each table lookup yields db bits, in 1 byte);
		 * assemble into Rs until have WBITS bits (takes several src words);
		 * fetch dest word into Rd, operate into Rs, store in dest
		 *
		 * Something should be done to improve this code, but
		 * it isn't used much.
		 */

		if(sh < 0) {
			c = (-sh)/WBITS;
			sh += c*WBITS;
			if(WBITS == 8) sfo = 24 + sh;
		} else {
			c = 0;
			if(WBITS == 8) sfo = sh ? 16 + sh : 24;
		}
		firstd = 1;
		firsts = 1;
		for(df = c*db*(4/W2L); df < WBITS && df < doff + width; ) {
			c = (doff + width - df) << -le;
			/* c = number of source bits needed to fill rest */
			if(WBITS == 8) {
				if(sh == 0) {
					Load_Rt_P;
					sc++;
				} else if(sh > 0) {
					if(firsts) {
						Load_Rt_P;
						Olsh_RtRt(8);
						sc++;
					} else {
						Olsh_RtRt(8);
					}
					if(shb < c) {
						Loador_Rt_P;
						sc++;
					}
				} else {
					if(firsts) {
						Load_Rt_P;
						sc++;
					} else {
						Olsh_RtRt(8);
						if(sha < c) {
							Loador_Rt_P;
							sc++;
						}
					}
				}
			} else {
				if(sh == 0) {
					Load_Rt_P;
					sc++;
				} else if(sh > 0) {
					if(firsts) {
						Load_Rt_P;
						Olsha_RtRt;
						sc++;
					} else {
						Olsha_RtRu;
					}
					if(shb < c) {
						Load_Ru_P;
						sc++;
						Oorrshb_RtRu;
					}
				} else {
					if(!firsts) {
						Olshb_RtRu;
					}
					if(sha < c) {
						Load_Ru_P;
						sc++;
						if(firsts) {
							Orsha_RtRu;
						} else {
							Oorrsha_RtRu;
						}
					}
				}
			}
			firsts = 0;
			if(WBITS == 8) {
				Table_RdRt(sfo,8,0);
				if(firstd) {
					Olsh_RsRd(8-(df+db));
				} else {
					Assemble(df,db);
				}
				df += db;
				firstd = 0;
			} else {
				for(sf = 0; sf < WBITS; ) {
					Table_RdRt(sf,8,0);
					if(firstd) {
						c = WBITS-(df+db);
						Olsh_RsRd(c);
					} else {
						Assemble(df,db);
					}
					df += db;
					sf += 8;
					firstd = 0;
				}
			}
		}
		Fetch_Rd(1);
		Emitop;
		width -= WBITS - doff;
		if(width > 0 && lmask != WMASK) {
				Ofield(lmask);
		} else if(width <= 0) {
			lmask &= rmask;
			Ofield(lmask);
		}
		Store_Rs_P;
		dc++;
		if(width <= 0)
			break;
	
		c = width >> LWBITS;
		if(c) {
			li = 0;
			if(c > 1) {
				Ilabel(c);
				li = p;
			}
			for(df = 0; df < WBITS; ) {
				if(WBITS == 8) {
					if(sh == 0) {
						Load_Rt_P;
					} else {
						Olsh_RtRt(8);
						Loador_Rt_P;
					}
					sc += c;
					Table_RdRt(sfo,8,0);
					Assemble(df,db);
					df += db;
				} else {
					if(sh == 0) {
						Load_Rt_P;
					} else if(sh > 0) {
						Olsha_RtRu;
						Load_Ru_P;
						Oorrshb_RtRu;
					} else {
						Olshb_RtRu;
						Load_Ru_P;
						Oorrsha_RtRu;
					}
					sc += c;
					for(sf = 0; sf < WBITS; ) {
						Table_RdRt(sf,8,0);
						Assemblex(df,db);
						df += db;
						sf += 8;
					}
				}
			}
			if(fd) {
				Fetch_Rd(1);
			}
			Emitop;
			Store_Rs_P;
			dc += c;
			if(c > 1) {
				Iloop(li);
			}
		}
	
		width -= c << LWBITS;
		if(width <= 0)
			break;
	
		for(df = 0; df < width; ) {
			c = (width - df) << -le;
			if(WBITS == 8) {
				if(sh == 0) {
					Load_Rt_P;
					sc++;
				} else if(sh > 0) {
					Olsh_RtRt(8);
					if(shb < c) {
						sc++;
						Loador_Rt_P;
					}
				} else {
					Olsh_RtRt(8);
					if(sha < c) {
						Loador_Rt_P;
						sc++;
					}
				}
				Table_RdRt(sfo,8,0);
				Assemble(df,db);
				df += db;
			} else {
				if(sh == 0) {
					Load_Rt_P;
					sc++;
				} else if(sh > 0) {
					Olsha_RtRu;
					if(shb < c) {
						Load_Ru_P;
						Oorrshb_RtRu;
						sc++;
					}
				} else {
					Olshb_RtRu;
					if(sha < c) {
						Load_Ru_P;
						Oorrsha_RtRu;
						sc++;
					}
				}
				for(sf = 0; sf < 32; ) {
					Table_RdRt(sf,8,0);
					Assemble(df,db);
					df += db;
					sf += 8;
				}
			}
		}
		Fetch_Rd(1);
		Emitop;
		Ofield(rmask);
		Store_Rs_P;
		dc++;
		break;

	}

	/* finish outer loop, put in rts, and execute */

	if(height > 0) {
		if(backward)
			c = (dc - dspan) * (WBITS/8);
		else
			c = (dspan - dc) * (WBITS/8);
		if(c) {
			Add_Ad(c);
		}
		if(fs) {
			if(backward)
				c = (sc - sspan) * (WBITS/8);
			else
				c = (sspan - sc) * (WBITS/8);
			if(c) {
				Add_As(c);
			}
		}
		Oloop(lo);
	}
	Orts;
#ifdef TEST
	if(onstack && p - memstart > Progmaxnoconv)
		print("Increase Progmaxnoconv to at least %d!\n", p - memstart);
	else if(p - memstart > Progmax)
		print("Increase Progmax to at least %d!\n", p - memstart);
	mem = memstart;
#endif
	bbexec((void*)memstart, (p-memstart)*sizeof(Type), onstack);
}

#ifdef TEST
void	prprog(void);
GBitmap *bb1, *bb2;
ulong	*src, *dst, *xdst, *xans;
int	swds, dwds;
long	ticks;
int	timeit;

#ifdef BYTEREV
ulong
byterev(ulong v)
{
	return (v>>24)|((v>>8)&0x0000FF00)|((v<<8)&0x00FF0000)|(v<<24);
}
#endif
#ifdef T386
long _clock;
#endif

long
func(int f, long s, int sld, long d, int dld)
{
	long a;
	int sh, i, db, sb;

	db = 1 << dld;
	sb = 1 << sld;
	sh = db - sb;
	if(sh > 0) {
		a = s;
		for(i = sb; i<db; i += sb){
			a <<= sb;
			s |= a;
		}
	} else if(sh < 0)
		s >>= -sh;

	switch(f){
	case Zero:	d = 0;			break;
	case DnorS:	d = ~(d|s);		break;
	case DandnotS:	d = d & ~s;		break;
	case notS:	d = ~s;			break;
	case notDandS:	d = ~d & s;		break;
	case notD:	d = ~d;			break;
	case DxorS:	d = d ^ s;		break;
	case DnandS:	d = ~(d&s);		break;
	case DandS:	d = d & s;		break;
	case DxnorS:	d = ~(d^s);		break;
	case S:		d = s;			break;
	case DornotS:	d = d | ~s;		break;
	case D:		d = d;			break;
	case notDorS:	d = ~d | s;		break;
	case DorS:	d = d | s;		break;
	case F:		d = ~0;			break;
	}

	d &= ((1<<db)-1);
	return d;
}

void
run(int fr, int to, int w, int op)
{
	int i, j, f, t, fy, ty;
	extern long *_clock;

	fr += bb2->r.min.x;
	to += bb1->r.min.x;
	fy = bb2->r.min.y + 1;
	ty = bb1->r.min.y + 1;
	if(timeit) {
		memcpy(dst, xdst, dwds * sizeof(long));
		ticks -= *_clock;
		gbitblt(bb1, Pt(to,ty), bb2, Rect(fr,fy,fr+w,fy+2), op);
		ticks += *_clock;
		return;
	}
	f = fr;
	t = to;
	memcpy(dst, xdst, dwds * sizeof(long));
	for(i=0; i<w; i++) {
		gbitblt(bb1, Pt(t,ty), bb2, Rect(f,fy,f+1,fy+1), op);
		gbitblt(bb1, Pt(t,ty+1), bb2, Rect(f,fy+1,f+1,fy+2), op);
		f++;
		t++;
	}
	memcpy(xans, dst, dwds * sizeof(long));

	memcpy(dst, xdst, dwds * sizeof(long));
	gbitblt(bb1, Pt(to,ty), bb2, Rect(fr,fy,fr+w,fy+2), op);

	if(memcmp(xans, dst, dwds * sizeof(long))) {
		/*
		 * print src and dst row offset, width in bits, and forw/back
		 * then print for each of the four rows: the source (s),
		 * the dest (d), the good value of the answer (g),
		 * and the actual bad value of the answer (b)
		 */
		print("fr=%d to=%d w=%d fb=%d%d\n",
			fr, to, w, FORCEFORW, FORCEBAKW);
		print("dst bitmap b %#lux, z %d, w %d, ld %d, r [%d,%d][%d,%d]\n",
			bb1->base, bb1->zero, bb1->width, bb1->ldepth,
			bb1->r.min.x, bb1->r.min.y, bb1->r.max.x, bb1->r.max.y);
		print("src bitmap b %#lux, z %d, w %d, ld %d, r [%d,%d][%d,%d]\n",
			bb2->base, bb2->zero, bb2->width, bb2->ldepth,
			bb2->r.min.x, bb2->r.min.y, bb2->r.max.x, bb2->r.max.y);
		for(j=0; 7*j < dwds; j++) {
			print("\ns");
			for(i=0; i<7 && 7*j+i < dwds; i++)
				print(" %.8lux", src[7*j + i]);
			print("\nd");
			for(i=0; i<7 && 7*j+i < dwds; i++)
				print(" %.8lux", xdst[7*j + i]);
			print("\ng");
			for(i=0; i<7 && 7*j+i < dwds; i++)
				print(" %.8lux", xans[7*j + i]);
			print("\nb");
			for(i=0; i<7 && 7*j+i < dwds; i++)
				print(" %.8lux", dst[7*j + i]);
			print("\n");
		}
		prprog();
	}
}

void
main(int argc, char *argv[])
{
	int f, t, w, i, sld, dld, op, iters, simple;
	ulong s, d, spix, dpix, apix, fpix, m, *ps, *pd;
	Point sorg, dorg;
	GBitmap *bs, *bd;
	long seed;
	char *ct;

	sld = 0;
	dld = 0;
	timeit = 0;
	iters = 200;
	simple = 0;
	ARGBEGIN {
	case 'i':
		iters = atoi(ARGF());
		break;
	case 's':
		simple = 1;
		break;
	case 't':
		timeit = 1;
		ct = ARGF();
		if(ct)
			iters = atoi(ct);
		break;
	} ARGEND
	if(argc > 0)
		sld = atoi(argv[0]);
	if(argc > 1)
		dld = atoi(argv[1]);
	if(sld < 0 || sld > 3 || dld < 0 || dld > 3 ||
           (sld != dld && !tabs[sld][dld])){
		print("conversion from ldepth %d to %d not enabled\n",
			sld, dld);
		exits(0);
	}
	if(!timeit && !simple) {
		seed = time(0);
		print("seed %lux\n", seed); srand(seed);	/**/
	}

	print("sld %d dld %d\n", sld, dld);
	op = 1/*Zero*/;

	/* bitmaps for 1-bit tests */
	bd = gballoc(Rect(0,0,32,1), dld);
	bs = gballoc(Rect(0,0,32,1), sld);
	for(i=0; i<bs->width; i++)
		bs->base[i] = lrand();

	/* bitmaps for rect tests */
	if(simple) {
		dorg = Pt(0,0);
		sorg = Pt(0,0);
	} else {
		dorg = Pt(nrand(63)-31,nrand(63)-31);
		sorg = Pt(nrand(63)-31,nrand(63)-31);
	}
	bb1 = gballoc(Rpt(dorg,add(dorg,Pt(200,4))), dld);
	bb2 = gballoc(Rpt(sorg,add(sorg,Pt(200,4))), sld);
	dwds = bb1->width * Dy(bb1->r);
	swds = bb2->width * Dy(bb2->r);
	dst = bb1->base;
	src = bb2->base;
	xdst = malloc(dwds * sizeof(long));
	xans =  malloc(dwds * sizeof(long));
	for(i=0; i<swds; i++)
		src[i] = lrand();
	for(i=0; i<dwds; i++)
		xdst[i] = lrand();
loop:
	print("Op %d\n", op);
	if(!timeit) {
		print("one pixel\n");
		ps = bs->base;
		pd = bd->base;
		FORCEFORW = 1;
		FORCEBAKW = 0;
		for(i=0; i<1000; i++, FORCEFORW = !FORCEFORW, FORCEBAKW = !FORCEBAKW) {
			f = nrand(32 >> sld);
			t = nrand(32 >> dld);
			s = lrand();
			d = lrand();
			ps[0] = s;
			pd[0] = d;
#ifdef BYTEREV
			spix = scrpix(byterev(s),f,sld);
			dpix = scrpix(byterev(d),t,dld);
#else
			spix = scrpix(s,f,sld);
			dpix = scrpix(d,t,dld);
#endif
			apix = func(op, spix, sld, dpix, dld);
			gbitblt(bd, Pt(t,0), bs, Rect(f,0,f+1,1), op);
			if(ps[0] != s) {
				print("bb src %.8lux %.8lux %d %d\n", ps[0], s, f, t);
				exits("error");
			}
			m = scrmask(t,dld);
#ifdef BYTEREV
			m = byterev(m);
#endif
			if((pd[0] & ~m) != (d & ~m)) {
					print("bb dst1 %.8lux %.8lux\n",
						s, d);
					print("bb      %.8lux %.8lux %d %d\n",
						ps[0], pd[0], f, t);
					prprog();
					exits("error");
			}
#ifdef BYTEREV
			fpix = scrpix(byterev(pd[0]),t,dld);
#else
			fpix = scrpix(pd[0],t,dld);
#endif
			if(apix != fpix) {
				print("bb dst2 %.8lux %.8lux\n",
					s, d);
				print("bb      %.8lux %.8lux %d %d\n",
					ps[0], pd[0], f, t);
				print("bb      %.8lux %.8lux %.8lux %.8lux\n",
					spix, dpix, apix, fpix);
				prprog();
				exits("error");
			}
		}
	}

	print("for\n");
	FORCEFORW = 1;
	FORCEBAKW = 0;

	for(i=0; i<iters; i++) {
		f = nrand(64);
		t = nrand(64);
		w = nrand(130);
		run(f, t, w, op);
	}

	if(sld == dld) {
		print("bak\n");
		FORCEFORW = 0;
		FORCEBAKW = 1;
	
		for(i=0; i<iters; i++) {
			f = nrand(64);
			t = nrand(64);
			w = nrand(130);
			run(f, t, w, op);
		}
	}

	if(op < F) {
		op++;
		goto loop;
	}
	if(timeit)
		print("time: %d ticks\n", ticks);
	exits(0);
}


#endif

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.