File:  [Plan 9 NeXT] / lucent / sys / src / libgnot / bbv.h
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 18:01:05 2018 UTC (8 years, 1 month ago) by root
Branches: lucent, MAIN
CVS tags: plan9, HEAD
Plan 9 NeXT

typedef	long	Type;

/*
 * See the comments at the beginning of gbitblt.c
 * for an outline of how this bitblt works
 
 * Registers
 *   in addition to the registers of the abstract machine,
 *   we use RF to hold ~0 always, RX as a scratch register,
 *   and AT to hold the address of the table given in Inittab.
 */
#define As	5
#define Ad	6
#define	Rs	7
#define	Rd	8
#define Rt	9
#define Ru	10
#define Ri	11
#define Ro	12
#define RF	1	/* ~0 */
#define RX	2	/* scratch */
#define AT	3	/* conversion table */

/*
 * Macros for assembling MIPS instructions
 */

/* generate `a', `size' bits wide, into bit position `shift' */
/* the SMM version is used when there might be extra bits in `a' */
#define	SM(a,size,shift)	((a)<<(shift))
#define	SMM(a,size,shift)	(((a)&(1<<(size))-1)<<(shift))

/* Make sure im fits in 16 bits and sh fits in 5 bits */
#define Iinst(op,rs,rt,im)	(SM(op,6,26)|SM(rs,5,21)|SM(rt,5,16)|(im))
#define Rinst(op,rs,rt,rd,sh,f)	(SM(op,6,26)|SM(rs,5,21)|SM(rt,5,16)|SM(rd,5,11)|SM(sh,5,6)|(f))

/*
 * Instructions
 */

#define SLL(rs,rd,sh)	Rinst(0,0,rs,rd,sh,0)
#define SRL(rs,rd,sh)	Rinst(0,0,rs,rd,sh,2)
#define ADDU(rs,rt,rd)	Rinst(0,rs,rt,rd,0,041)
#define ADDIU(rs,rd,v)	Iinst(011,rs,rd,v)
#define AND(rs,rt,rd)	Rinst(0,rs,rt,rd,0,044)
#define ANDI(rs,rd,v)	Iinst(014,rs,rd,v)
#define OR(rs,rt,rd)	Rinst(0,rs,rt,rd,0,045)
#define ORI(rs,rd,v)	Iinst(015,rs,rd,v)
#define XOR(rs,rt,rd)	Rinst(0,rs,rt,rd,0,046)
#define XORI(rs,rd,v)	Iinst(016,rs,rd,v)
#define NOR(rs,rt,rd)	Rinst(0,rs,rt,rd,0,047)
#define NOP		Rinst(0,0,0,0,0,047)
#define LUI(r,v)	Iinst(017,0,r,v)
#define LW(as,rd)	Iinst(043,as,rd,0)
#define LBU(as,rd)	Iinst(044,as,rd,0)
#define LHU(as,rd)	Iinst(045,as,rd,0)
#define SW(ad,rs)	Iinst(053,ad,rs,0)
#define BGTZ(r,d)	Iinst(007,r,0,d)
#define JR(r)		Rinst(0,r,0,0,0,010)

/*
 * Macros for assembling the operations of the abstract machine.
 * Each assumes that ulong *p points to the next location where
 * an instruction should be assembled.
 * These macros can use RX as a scratch register, but no others.
 * They can assume RF holds ~0.
 */

#define Ofield(c)	*p++ = XOR(Rs,Rd,Rs);				\
			if((c)&0xFFFF0000) {				\
				*p++ =  LUI(RX,((ulong)(c))>>16);	\
				*p++ = ORI(RX,RX,(c)&0xFFFF);		\
				*p++ = AND(RX,Rs,Rs);			\
			} else						\
				*p++ = ANDI(Rs,Rs,(c));			\
			*p++ = XOR(Rs,Rd,Rs)

#define Olsha_RsRt	*p++ = SLL(Rt,Rs,sha)
#define Olshb_RsRt	*p++ = SLL(Rt,Rs,shb)
#define Olsh_RsRd(c)	*p++ = SLL(Rd,Rs,c)
#define Olsh_RtRt(c)	*p++ = SLL(Rt,Rt,c)
#define Olsha_RtRt	*p++ = SLL(Rt,Rt,sha)
#define Olsha_RtRu	*p++ = SLL(Ru,Rt,sha)
#define Olshb_RtRu	*p++ = SLL(Ru,Rt,shb)
#define Orsha_RsRt	*p++ = SRL(Rt,Rs,sha)
#define Orshb_RsRt	*p++ = SRL(Rt,Rs,shb)
#define Orsha_RtRu	*p++ = SRL(Ru,Rt,sha)
#define Orshb_RtRu	*p++ = SRL(Ru,Rt,shb)
#define Oorlsha_RsRt	*p++ = SLL(Rt,RX,sha); *p++ = OR(RX,Rs,Rs)
#define Oorlshb_RsRt	*p++ = SLL(Rt,RX,shb); *p++ = OR(RX,Rs,Rs)
#define Oorlsh_RsRd(c)	*p++ = SLL(Rd,RX,c); *p++ = OR(RX,Rs,Rs)
#define Oorrsha_RsRt	*p++ = SRL(Rt,RX,sha); *p++ = OR(RX,Rs,Rs)
#define Oorrshb_RsRt	*p++ = SRL(Rt,RX,shb); *p++ = OR(RX,Rs,Rs)
#define Oorrsha_RtRu	*p++ = SRL(Ru,RX,sha); *p++ = OR(RX,Rt,Rt)
#define Oorrshb_RtRu	*p++ = SRL(Ru,RX,shb); *p++ = OR(RX,Rt,Rt)
#define Oor_RsRd	*p++ = OR(Rs,Rd,Rs)

/*
 * Try to use smaller instructions when the constant is small.
 * Note that MIPS sign extends immediate operands
 */
#define Add_As(c)	if((c)&0xFFFF8000) { \
				if(!((c)&0xFFFF0000)) { \
					*p++ = ORI(0,RX,(c)&0xFFFF); \
					*p++ = ADDU(RX,As,As); \
				} else { \
					*p++ = LUI(RX,((ulong)(c))>>16); \
					*p++ = ORI(RX,RX,(c)&0xFFFF); \
					*p++ = ADDU(RX,As,As); \
				} \
			} else \
				*p++ = ADDIU(As,As,c)

#define Add_Ad(c)	if((c)&0xFFFF8000) { \
				if(!((c)&0xFFFF0000)) { \
					*p++ = ORI(0,RX,(c)&0xFFFF); \
					*p++ = ADDU(RX,Ad,Ad); \
				} else { \
					*p++ = LUI(RX,((ulong)(c))>>16); \
					*p++ = ORI(RX,RX,(c)&0xFFFF); \
					*p++ = ADDU(RX,Ad,Ad); \
				} \
			} else \
				*p++ = ADDIU(Ad,Ad,c)

#define Initsd(s,d)	*p++ = LUI(As,((ulong)(s))>>16); \
			*p++ = ORI(As,As,((ulong)(s))&0xFFFF); \
			*p++ = LUI(Ad,((ulong)(d))>>16); \
			*p++ = ORI(Ad,Ad,((ulong)(d))&0xFFFF)

#define Initsh(a,b)

/* Put all ones in RF */
#define Extrainit	*p++ = ADDIU(0,RF,0xFFFF)

/*
 * We put one less than the loop count into R[io], so that
 * the loop instructions can decrement after the test instead
 * of before
 */
#define Ilabel(c)	tmp = (c)-1; \
			if(tmp&0xFFFF8000) { \
				*p++ = LUI(Ri,((ulong)tmp)>>16); \
				*p++ = ORI(Ri,Ri,tmp&0xFFFF); \
			} else \
				*p++ = ADDIU(0,Ri,tmp)

#define Olabel(c)	tmp = (c)-1; \
			if(tmp&0xFFFF8000) { \
				*p++ = LUI(Ro,((ulong)tmp)>>16); \
				*p++ = ORI(Ro,Ro,tmp&0xFFFF); \
			} else \
				*p++ = ADDIU(0,Ro,tmp)

/*
 * The decrement is done after the test instead of before
 * so that the required delay slot can be filled with
 * the decrement (counts were adjusted by [IO]label)
 */
#define Iloop(lp)	*p = BGTZ(Ri,(lp-(p+1))&0xFFFF); p++; \
			*p++ = ADDIU(Ri,Ri,0xFFFF)

#define Oloop(lp)	*p = BGTZ(Ro,(lp-(p+1))&0xFFFF); p++; \
			*p++ = ADDIU(Ro,Ro,0xFFFF)

#define Orts		*p++ = JR(31); *p++ = NOP

/*
 * Load and Fetch macros: arg should be 1 if following instr might use loaded value
 * In the predecrement versions, it's as easy to do the decrment afterwards in
 * the delay slot
 */

#define Load_Rs_P	*p++ = LW(As,Rs); *p++ = ADDIU(As,As,4)
#define Load_Rt_P	*p++ = LW(As,Rt); *p++ = ADDIU(As,As,4)
#define Loadzx_Rt_P	*p++ = LW(As,Rt); *p++ = ADDIU(As,As,4)
#define Loador_Rt_P	*p++ = LW(As,Rt); *p++ = ADDIU(As,As,4)
#define Load_Ru_P	*p++ = LW(As,Ru); *p++ = ADDIU(As,As,4)
#define Load_Rd_D(f)	*p++ = ADDIU(As,As,-4&0xFFFF); *p++ = LW(As,Rd); \
			if(f) *p++ = NOP
#define Load_Rs_D(f)	*p++ = ADDIU(As,As,-4&0xFFFF); *p++ = LW(As,Rs); \
			if(f) *p++ = NOP
#define Load_Rt_D(f)	*p++ = ADDIU(As,As,-4&0xFFFF); *p++ = LW(As,Rt); \
			if(f) *p++ = NOP
#define Loadzx_Rt_D(f)	*p++ = ADDIU(As,As,-4&0xFFFF); *p++ = LW(As,Rt); \
			if(f) *p++ = NOP
#define Load_Rd(f)	*p++ = LW(As,Rd); if(f) *p++ = NOP
#define Load_Rs(f)	*p++ = LW(As,Rs); if(f) *p++ = NOP
#define Load_Rt(f)	*p++ = LW(As,Rt); if(f) *p++ = NOP
#define Loadzx_Rt(f)	*p++ = LW(As,Rt); if(f) *p++ = NOP
#define Fetch_Rd_P(f)	*p++ = LW(Ad,Rd); *p++ = ADDIU(Ad,Ad,4)
#define Fetch_Rd_D(f)	*p++ = ADDIU(Ad,Ad,-4&0xFFFF); *p++ = LW(Ad,Rd); \
			if(f) *p++ = NOP
#define Fetch_Rd(f)	*p++ = LW(Ad,Rd); if(f) *p++ = NOP
#define Store_Rs_P	*p++ = SW(Ad,Rs); *p++ = ADDIU(Ad,Ad,4)
#define Store_Rs_D	*p++ = ADDIU(Ad,Ad,-4&0xFFFF); *p++ = SW(Ad,Rs)
#define Store_Rs	*p++ = SW(Ad,Rs)
#define Nop		*p++ = NOP

#define Inittab(t,s)	*p++ = LUI(AT,((ulong)(t))>>16); \
			*p++ = ORI(AT,AT,((ulong)(t))&0xFFFF)

/* emit code to look up n bits at offset o; table entries are 1<<l bytes long */
#define Table_RdRt(o,n,l)			\
	tmp = 32-((o)+(n))-(l);			\
	if(tmp > 0)				\
		*p++ = SRL(Rt,Rd,tmp);		\
	else if((l) > 0)			\
		*p++ = SLL(Rt,Rd,l);		\
	else					\
		*p++ = ADDU(Rt,0,Rd);		\
	*p++ = ANDI(Rd,Rd,((1<<(n))-1)<<(l));	\
	*p++ = ADDU(Rd,AT,Rd);			\
	if(osiz==1) *p++ = LBU(Rd,Rd);		\
	else if(osiz==2) *p++ = LHU(Rd,Rd);	\
	else *p++ = LW(Rd,Rd);			\
	*p++ = NOP

#define Table_RsRt(o,n,l)			\
	tmp = 32-((o)+(n))-(l);			\
	if(tmp > 0)				\
		*p++ = SRL(Rt,Rs,tmp);		\
	else if((l) > 0)			\
		*p++ = SLL(Rt,Rs,l);		\
	else					\
		*p++ = ADDU(Rt,0,Rs);		\
	*p++ = ANDI(Rd,Rs,((1<<(n))-1)<<(l));	\
	*p++ = ADDU(Rs,AT,Rs);			\
	if(osiz==1) *p++ = LBU(Rs,Rs);		\
	else if(osiz==2) *p++ = LHU(Rs,Rs);	\
	else *p++ = LW(Rs,Rs);			\
	*p++ = NOP

/* emit code to assemble low n bits of Rd into offset o in Rs */
#define Assemble(o,n)				\
	if((o) == 0) {				\
		Olsh_RsRd(32-(n));		\
	} else if((o) == 32-(n)) {		\
		*p++ = OR(Rs,Rd,Rs);		\
	} else {				\
		Oorlsh_RsRd(32-((o)+(n)));	\
	}

/* emit code to assemble low n bits of Rd into offset o in Rs.
   this works by shifting Rd as we go, it only works if
   the whole word will eventually be filled */
#define Assemblex(o,n)				\
	if((o) == 0) {				\
		*p++ = ADDU(Rd,0,Rs);		\
	} else {				\
		*p++ = SLL(Rs,Rs,n);		\
		*p++ = OR(Rd,Rs,Rs);		\
	}

#define Emitop			\
	p[0] = fi[0];		\
	p[1] = fi[1];		\
	p = (Type*)(((char *)p)+fin)

typedef struct	Fstr
{
	char	fetchs;
	char	fetchd;
	short	n;
	Type	instr[2];
} Fstr;

Fstr	fstr[16] =
{
[0]	0,0,4,		/* Zero */
	{ LUI(Rs,0), 0 },

[1]	1,1,4,		/* DnorS */
	{ NOR(Rs,Rd,Rs), 0 },

[2]	1,1,8,		/* DandnotS */
	{ XOR(Rs,RF,Rs), AND(Rs,Rd,Rs) },

[3]	1,0,4,		/* notS */
	{ XOR(Rs,RF,Rs), 0 },

[4]	1,1,8,		/* notDandS */
	{ XOR(Rs,RF,Rs), NOR(Rd,Rs,Rs) },

[5]	0,1,4,		/* notD */
	{ XOR(Rd,RF,Rs), 0 },

[6]	1,1,4,		/* DxorS */
	{ XOR(Rd,Rs,Rs), 0 },

[7]	1,1,8,		/* DnandS */
	{ AND(Rd,Rs,Rs), XOR(Rs,RF,Rs) },

[8]	1,1,4,		/* DandS */
	{ AND(Rd,Rs,Rs), 0 },

[9]	1,1,8,		/* DxnorS */
	{ XOR(Rd,Rs,Rs), XOR(Rs,RF,Rs) },

[10]	0,1,4,		/* D */
	{ ADDU(Rd,0,Rs), 0 },

[11]	1,1,8,		/* DornotS */
	{ XOR(Rs,RF,Rs), OR(Rs,Rd,Rs) },

[12]	1,0,0,		/* S */
	{0, 0},

[13]	1,1,8,		/* notDorS */
	{ XOR(Rd,RF,RX), OR(Rs,RX,Rs) },

[14]	1,1,4,		/* DorS */
	{ OR(Rs,Rd,Rs), 0 },

[15]	0,0,4,		/* F */
	{ OR(RF,0,Rs), 0 },
};

#include "tabs.h"
static uchar *tabs[4][4] =
{
	{	     0, (uchar*)tab01, (uchar*)tab02, (uchar*)tab03},
	{(uchar*)tab10,		    0, (uchar*)tab12, (uchar*)tab13},
	{(uchar*)tab20, (uchar*)tab21,		   0, (uchar*)tab23},
	{(uchar*)tab30,	(uchar*)tab31, (uchar*)tab32,		  0},
};

static uchar tabosiz[4][4] = /* size in bytes of entries */
{
	{ 0, 2, 4, 4},
	{ 1, 0, 2, 4},
	{ 1, 1, 0, 2},
	{ 1, 1, 1, 0},
};

enum {
	Progmax = 900,		/* max number of Type units in a bitblt prog */
	Progmaxnoconv = 64,	/* max number of Type units when no conversion */
};

void
prprog(void)
{
	abort();	/* use db */
}

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.