File:  [Plan 9 NeXT] / lucent / sys / src / libgnot / bbk.h
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 18:01:05 2018 UTC (8 years, 1 month ago) by root
Branches: lucent, MAIN
CVS tags: plan9, HEAD
Plan 9 NeXT

typedef	long	Type;

/*
 * See the comments at the beginning of bb.c and bbc.h
 * for an outline of how this bitblt works
 
 * Registers
 *   in addition to the registers of the abstract machine,
 *   we use RF to hold ~0 always, RX as a scratch register,
 *   Rc as a scratch register for forming constants, and
 *   AT to hold the address of the table given in Inittab.
 */
#define	R0	0
#define As	19
#define Ad	20
#define	Rs	21
#define	Rd	22
#define Rt	23
#define Ru	24
#define Ri	25
#define Ro	26
#define	Rc	27	/* const temporary */
#define RF	16	/* ~0 */
#define RX	17	/* scratch */
#define AT	18	/* conversion table */

/*
 * Instructions
 */
/* Inst2a */
#define	SETHI	0x04
/* Inst2b */
#define	BG	0x0A
/* Inst3 2 */
#define	ADD	0x00
#define	SUB	0x04
#define	AND	0x01
#define	OR	0x02
#define	XOR	0x03
#define	ANDN	0x05
#define	ORN	0x06
#define	SUBcc	0x14
#define	SLL	0x25
#define	SRL	0x26
#define	JMPL	0x38
/* Inst3 3 */
#define	LD	0x00
#define	LDUB	0x01
#define	LDUH	0x02
#define	ST	0x04

/*
 * Generate `a', `size' bits wide, into bit position `shift'.
 * (to save execution time, the onus is on the caller to make sure that
 * `a' fits in `size')
 */
#define	SM(a,size,shift)	((a)<<(shift))

#define	Inst2a(rd,op2,i)	(SM(rd,5,25)|SM(op2,4,22)|SM(i,22,0))
#define	Inst2b(a,cond,op2,d)	Inst2a(SM(a,1,4)|SM(cond,4,0), op2, d)

#define	Inst3X(op,rd,op3,rs1)	(SM(op,2,30)|SM(rd,5,25)|SM(op3,6,19)|SM(rs1,5,14))
#define	Inst3a(op,rd,op3,rs1,i,asi,rs2)	(Inst3X(op,rd,op3,rs1)|SM(i,1,13)|SM(asi,8,5)|SM(rs2,5,0))
#define	Inst3b(op,rd,op3,rs1,i,si)	(Inst3X(op,rd,op3,rs1)|SM(i,1,13)|SM(si,13,0))
#define	Inst3c(op,rd,op3,rs1,opf,rs2)	(Inst3X(op,rd,op3,rs1)|SM(opf,9,5)|SM(rs2,5,0))

#define	OpConst(Rdst,Rsrc,op,c)		/* Rdst = Rsrc op c */ \
			if(((ulong)(c)) <= 0xFFF) \
				*p++ = Inst3b(2,Rdst,op,Rsrc,1,(c)); \
			else { \
				RConst(Rc,(c)); \
				*p++ = Inst3a(2,Rdst,op,Rsrc,0,0,Rc); \
			}	

#define	RConst(Rdst,c)			/* Rdst = c; c is large */ \
			*p++ = Inst2a(Rdst,SETHI,((ulong)(c))>>10); \
			*p++ = Inst3b(2,Rdst,OR,Rdst,1,(c)&0x3FF)


/*
 * Macros for assembling the operations of the abstract machine.
 * Each assumes that ulong *p points to the next location where
 * an instruction should be assembled.
 * These macros can use RX as a scratch register, but no others.
 * They can assume RF holds ~0 and R0 holds 0.
 */

#define Ofield(c)	*p++ = Inst3a(2,Rs,XOR,Rs,0,0,Rd); \
			RConst(Rc,(c)); \
			*p++ = Inst3a(2,Rs,AND,Rs,0,0,Rc); \
			*p++ = Inst3a(2,Rs,XOR,Rs,0,0,Rd)

#define Olsha_RsRt	*p++ = Inst3a(2,Rs,SLL,Rt,1,0,sha)
#define Olshb_RsRt	*p++ = Inst3a(2,Rs,SLL,Rt,1,0,shb)
#define Olsh_RsRd(c)	*p++ = Inst3a(2,Rs,SLL,Rd,1,0,(c))
#define Olsh_RtRt(c)	*p++ = Inst3a(2,Rt,SLL,Rt,1,0,(c))
#define Olsha_RtRt	*p++ = Inst3a(2,Rt,SLL,Rt,1,0,sha)
#define Olsha_RtRu	*p++ = Inst3a(2,Rt,SLL,Ru,1,0,sha)
#define Olshb_RtRu	*p++ = Inst3a(2,Rt,SLL,Ru,1,0,shb)
#define Orsha_RsRt	*p++ = Inst3a(2,Rs,SRL,Rt,1,0,sha)
#define Orshb_RsRt	*p++ = Inst3a(2,Rs,SRL,Rt,1,0,shb)
#define Orsha_RtRu	*p++ = Inst3a(2,Rt,SRL,Ru,1,0,sha)
#define Orshb_RtRu	*p++ = Inst3a(2,Rt,SRL,Ru,1,0,shb)
#define Oorlsha_RsRt	*p++ = Inst3a(2,RX,SLL,Rt,1,0,sha); \
			*p++ = Inst3a(2,Rs,OR,Rs,0,0,RX)
#define Oorlshb_RsRt	*p++ = Inst3a(2,RX,SLL,Rt,1,0,shb); \
			*p++ = Inst3a(2,Rs,OR,Rs,0,0,RX)
#define Oorlsh_RsRd(c)	*p++ = Inst3a(2,RX,SLL,Rd,1,0,(c)); \
			*p++ = Inst3a(2,Rs,OR,Rs,0,0,RX)
#define Oorrsha_RsRt	*p++ = Inst3a(2,RX,SRL,Rt,1,0,sha); \
			*p++ = Inst3a(2,Rs,OR,Rs,0,0,RX)
#define Oorrshb_RsRt	*p++ = Inst3a(2,RX,SRL,Rt,1,0,shb); \
			*p++ = Inst3a(2,Rs,OR,Rs,0,0,RX)
#define Oorrsha_RtRu	*p++ = Inst3a(2,RX,SRL,Ru,1,0,sha); \
			*p++ = Inst3a(2,Rt,OR,Rt,0,0,RX)
#define Oorrshb_RtRu	*p++ = Inst3a(2,RX,SRL,Ru,1,0,shb); \
			*p++ = Inst3a(2,Rt,OR,Rt,0,0,RX)
#define Oor_RsRd	*p++ = Inst3a(2,Rs,OR,Rd,0,0,Rs)

#define Add_As(c)	OpConst(As,As,ADD,(c))

#define Add_Ad(c)	OpConst(Ad,Ad,ADD,(c))

#define Initsd(s,d)	RConst(As,((ulong)(s))); \
			RConst(Ad,((ulong)(d)))

#define Initsh(a,b)

/* Put all ones in RF */
#define Extrainit	*p++ = Inst3b(2,RF,ORN,R0,1,0)

#define Ilabel(c)	RConst(Ri,(c))

#define Olabel(c)	RConst(Ro,(c))

#define Iloop(lp)	*p++ = Inst3b(2,Ri,SUBcc,Ri,1,1); \
			*p   = Inst2b(0,BG,0x2,((lp)-p)&0x3FFFFF); p++; \
			Nop

#define Oloop(lp)	*p++ = Inst3b(2,Ro,SUBcc,Ro,1,1); \
			*p   = Inst2b(0,BG,0x2,((lp)-p)&0x3FFFFF); p++; \
			Nop

#define Orts		*p++ = Inst3b(2,R0,JMPL,15,1,8); \
			Nop

/*
 * In the predecrement versions, it's as easy to do the decrement afterwards
 * in the (virtual) delay slot, which will go faster on some versions
 */

#define Load_Rs_P	*p++ = Inst3b(3,Rs,LD,As,1,0); *p++ = Inst3b(2,As,ADD,As,1,4)
#define Load_Rt_P	*p++ = Inst3b(3,Rt,LD,As,1,0); *p++ = Inst3b(2,As,ADD,As,1,4)
#define Loadzx_Rt_P	*p++ = Inst3b(3,Rt,LD,As,1,0); *p++ = Inst3b(2,As,ADD,As,1,4)
#define Loador_Rt_P	*p++ = Inst3b(3,Rt,LD,As,1,0); *p++ = Inst3b(2,As,ADD,As,1,4)
#define Load_Ru_P	*p++ = Inst3b(3,Ru,LD,As,1,0); *p++ = Inst3b(2,As,ADD,As,1,4)
#define Load_Rd_D(f)	*p++ = Inst3b(3,Rd,LD,As,1,(-4)&0x1FFF); *p++ = Inst3b(2,As,SUB,As,1,4)
#define Load_Rs_D(f)	*p++ = Inst3b(3,Rs,LD,As,1,(-4)&0x1FFF); *p++ = Inst3b(2,As,SUB,As,1,4)
#define Load_Rt_D(f)	*p++ = Inst3b(3,Rt,LD,As,1,(-4)&0x1FFF); *p++ = Inst3b(2,As,SUB,As,1,4)
#define Loadzx_Rt_D(f)	*p++ = Inst3b(3,Rt,LD,As,1,(-4)&0x1FFF); *p++ = Inst3b(2,As,SUB,As,1,4)
#define Load_Rd(f)	*p++ = Inst3b(3,Rd,LD,As,1,0)
#define Load_Rs(f)	*p++ = Inst3b(3,Rs,LD,As,1,0)
#define Load_Rt(f)	*p++ = Inst3b(3,Rt,LD,As,1,0)
#define Loadzx_Rt(f)	*p++ = Inst3b(3,Rt,LD,As,1,0)
#define Fetch_Rd_P(f)	*p++ = Inst3b(3,Rd,LD,Ad,1,0); *p++ = Inst3b(2,Ad,ADD,Ad,1,4)
#define Fetch_Rd_D(f)	*p++ = Inst3b(3,Rd,LD,Ad,1,(-4)&0x1FFF); *p++ = Inst3b(2,Ad,SUB,Ad,1,4)
#define Fetch_Rd(f)	*p++ = Inst3b(3,Rd,LD,Ad,1,0);
#define Store_Rs_P	*p++ = Inst3b(3,Rs,ST,Ad,1,0); *p++ = Inst3b(2,Ad,ADD,Ad,1,4)
#define Store_Rs_D	*p++ = Inst3b(3,Rs,ST,Ad,1,(-4)&0x1FFF); *p++ = Inst3b(2,Ad,SUB,Ad,1,4)
#define Store_Rs	*p++ = Inst3b(3,Rs,ST,Ad,1,0)
#define Nop		*p++ = Inst3a(2,R0,OR,R0,0,0,R0)

#define Inittab(t,s)	RConst(AT,((ulong)(t)))

/* emit code to look up n bits at offset o; table entries are 1<<l bytes long */
#define Table_RdRt(o,n,l)					\
	tmp = 32-((o)+(n))-(l);					\
	if(tmp >= 0)						\
		*p++ = Inst3a(2,Rd,SRL,Rt,1,0,tmp); 		\
	else if((l) > 0)					\
		*p++ = Inst3a(2,Rd,SLL,Rt,1,0,l); 		\
	else							\
		*p++ = Inst3a(2,Rd,ADD,Rt,0,0,R0);		\
	*p++ = Inst3b(2,Rd,AND,Rd,1,((1<<(n))-1)<<(l));		\
	if(osiz==1) *p++ = Inst3b(3,Rd,LDUB,Rd,0,AT);		\
	else if(osiz==2) *p++ = Inst3b(3,Rd,LDUH,Rd,0,AT);	\
	else *p++ = Inst3b(3,Rd,LD,Rd,0,AT);			\

#define Table_RsRt(o,n,l)					\
	tmp = 32-((o)+(n))-(l);					\
	*p++ = Inst3a(2,Rs,SRL,Rt,1,0,tmp); 			\
	if(tmp >= 0)						\
		*p++ = Inst3a(2,Rs,SRL,Rt,1,0,tmp); 		\
	else if((l) > 0)					\
		*p++ = Inst3a(2,Rs,SLL,Rt,1,0,l); 		\
	else							\
		*p++ = Inst3a(2,Rs,ADD,Rt,0,0,R0);		\
	if(osiz==1) *p++ = Inst3b(3,Rs,LDUB,Rs,0,AT);		\
	else if(osiz==2) *p++ = Inst3b(3,Rs,LDUH,Rs,0,AT);	\
	else *p++ = Inst3b(3,Rs,LD,Rs,0,AT);			\

/* emit code to assemble low n bits of Rd into offset o in Rs */
#define Assemble(o,n)				\
	if((o) == 0) {				\
		Olsh_RsRd(32-(n));		\
	} else if((o) == 32-(n)) {		\
		*p++ = Inst3a(2,Rs,OR,Rs,0,0,Rd); \
	} else {				\
		Oorlsh_RsRd(32-((o)+(n)));	\
	}

/* emit code to assemble low n bits of Rd into offset o in Rs.
   this works by shifting Rd as we go, it only works if
   the whole word will eventually be filled */
#define Assemblex(o,n)				\
	if((o) == 0) {				\
		*p++ = Inst3a(2,Rs,OR,Rd,0,0,R0); \
	} else {				\
		*p++ = Inst3a(2,Rs,SLL,Rs,1,0,n); \
		*p++ = Inst3a(2,Rs,OR,Rd,0,0,Rs); \
	}

#define Execandfree(memstart,onstack)				\
	(*(void (*)(void))memstart)();				\
	if(!onstack)						\
		bbfree(memstart, (p-memstart) * sizeof(Type));

#define Emitop			\
	p[0] = fi[0];		\
	p[1] = fi[1];		\
	p = (Type*)(((char *)p)+fin)

typedef struct	Fstr
{
	char	fetchs;
	char	fetchd;
	short	n;
	Type	instr[2];
} Fstr;

Fstr	fstr[16] =
{
[0]	0,0,4,		/* Zero */
	{ Inst3a(2,Rs,ADD,R0,0,0,R0), 0 },

[1]	1,1,8,		/* DnorS */
	{ Inst3a(2,Rs,OR,Rs,0,0,Rd), Inst3a(2,Rs,XOR,Rs,0,0,RF) },

[2]	1,1,4,		/* DandnotS */
	{ Inst3a(2,Rs,ANDN,Rd,0,0,Rs), 0 },

[3]	1,0,4,		/* notS */
	{ Inst3a(2,Rs,XOR,Rs,0,0,RF), 0 },

[4]	1,1,4,		/* notDandS */
	{ Inst3a(2,Rs,ANDN,Rs,0,0,Rd), 0 },

[5]	0,1,4,		/* notD */
	{ Inst3a(2,Rs,XOR,Rd,0,0,RF), 0 },

[6]	1,1,4,		/* DxorS */
	{ Inst3a(2,Rs,XOR,Rd,0,0,Rs), 0 },

[7]	1,1,8,		/* DnandS */
	{ Inst3a(2,Rs,AND,Rd,0,0,Rs), Inst3a(2,Rs,XOR,Rs,0,0,RF) },

[8]	1,1,4,		/* DandS */
	{ Inst3a(2,Rs,AND,Rd,0,0,Rs), 0 },

[9]	1,1,8,		/* DxnorS */
	{ Inst3a(2,Rs,XOR,Rd,0,0,Rs), Inst3a(2,Rs,XOR,Rs,0,0,RF) },

[10]	0,1,4,		/* D */
	{ Inst3a(2,Rs,ADD,Rd,0,0,R0), 0 },

[11]	1,1,4,		/* DornotS */
	{ Inst3a(2,Rs,ORN,Rd,0,0,Rs), 0 },

[12]	1,0,0,		/* S */
	{0, 0},

[13]	1,1,4,		/* notDorS */
	{ Inst3a(2,Rs,ORN,Rs,0,0,Rd), 0 },

[14]	1,1,4,		/* DorS */
	{ Inst3a(2,Rs,OR,Rd,0,0,Rs), 0 },

[15]	0,0,4,		/* F */
	{ Inst3a(2,Rs,OR,R0,0,0,RF), 0 },
};

#include "tabs.h"
static uchar *tabs[4][4] =
{
	{	     0, (uchar*)tab01, (uchar*)tab02, (uchar*)tab03},
	{(uchar*)tab10,		    0, (uchar*)tab12, (uchar*)tab13},
	{(uchar*)tab20, (uchar*)tab21,		   0, (uchar*)tab23},
	{(uchar*)tab30,	(uchar*)tab31, (uchar*)tab32,		  0},
};

static uchar tabosiz[4][4] = /* size in bytes of entries */
{
	{ 0, 2, 4, 4},
	{ 1, 0, 2, 4},
	{ 1, 1, 0, 2},
	{ 1, 1, 1, 0},
};

enum {
	Progmax = 1000,		/* max number of bytes in a bitblt prog */
	Progmaxnoconv = 70,	/* max number of Type units when no conversion */
};


#ifdef TEST
void
prprog(void)
{
	abort();	/* use db */
}

#endif

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.