Source to src/cpu/jit/compemu_optimizer_x86.c


Enter a symbol's name here to quickly find it.

#if USE_LOW_OPTIMIZER
/* Welcome to the magical world of cpp ;-) */

/* This was broken by the advent of FPU emulation. It also didn't
   provide any useful speedup while it worked. *Sigh* Someone fix my
   software, please ;-) */

#define MAXLOPTINST 100

#define LDECISION currprefs.comp_lowopt

#define lopt_op0(dummy) lopt_store_op0(
#define lopt_op1(a1) lopt_store_op1(LOPT_##a1,
#define lopt_op2(a1,a2) lopt_store_op2(LOPT_##a1,LOPT_##a2,
#define lopt_op3(a1,a2,a3) lopt_store_op3(LOPT_##a1,LOPT_##a2,LOPT_##a3,
#define lopt_op4(a1,a2,a3,a4) lopt_store_op4(LOPT_##a1,LOPT_##a2,LOPT_##a3,LOPT_##a4,
#define lopt_op5(a1,a2,a3,a4,a5) lopt_store_op5(LOPT_##a1,LOPT_##a2,LOPT_##a3,LOPT_##a4,LOPT_##a5,

#define ldirect0(dummy) ()
#define ldirect1(a1) (LDIR_##a1)
#define ldirect2(a1,a2) (LDIR_##a1,LDIR_##a2)
#define ldirect3(a1,a2,a3) (LDIR_##a1,LDIR_##a2,LDIR_##a3)
#define ldirect4(a1,a2,a3,a4) (LDIR_##a1,LDIR_##a2,LDIR_##a3,LDIR_##a4)
#define ldirect5(a1,a2,a3,a4,a5) (LDIR_##a1,LDIR_##a2,LDIR_##a3,LDIR_##a4,LDIR_##a5)

#define NONE 0
#define READ 1
#define WRITE 2
#define RMW (READ|WRITE)

#define SIZE1 4
#define SIZE2 8
#define SIZE4 12
#define FLOAT 16
#define SIZEMASK 12

#define LIMM NONE
#define LR1  (READ | SIZE1)
#define LR2  (READ | SIZE2)
#define LR4  (READ | SIZE4)
#define LW1  (WRITE | SIZE1)
#define LW2  (WRITE | SIZE2)
#define LW4  (WRITE | SIZE4)
#define LRW1 (RMW | SIZE1)
#define LRW2 (RMW | SIZE2)
#define LRW4 (RMW | SIZE4)
#define LFW  (READ | FLOAT)
#define LFR  (WRITE | FLOAT)
#define LFRW (RMW | FLOAT)
#define LMEMR NONE
#define LMEMW NONE
#define LMEMRW NONE

#define LOPT_IMM LIMM,
#define LOPT_R1  LR1 ,
#define LOPT_R2  LR2 ,
#define LOPT_R4  LR4 ,
#define LOPT_W1  LW1 ,
#define LOPT_W2  LW2 ,
#define LOPT_W4  LW4 ,
#define LOPT_RW1 LRW1,
#define LOPT_RW2 LRW2,
#define LOPT_RW4 LRW4,
#define LOPT_FR  LFR,
#define LOPT_FW  LFW,
#define LOPT_FRW LFRW,
#define LOPT_MEMR LMEMR,
#define LOPT_MEMW LMEMW,
#define LOPT_MEMRW LMEMRW,

#define LDIR_IMM
#define LDIR_R1
#define LDIR_R2
#define LDIR_R4
#define LDIR_W1
#define LDIR_W2
#define LDIR_W4
#define LDIR_RW1
#define LDIR_RW2
#define LDIR_RW4
#define LDIR_FW
#define LDIR_FR
#define LDIR_FRW
#define LDIR_MEMR
#define LDIR_MEMW
#define LDIR_MEMRW


#undef LOWFUNC
#undef LENDFUNC

#define LOWFUNC(flags,mem,nargs,func,args) \
  STATIC_INLINE void do_##func args

#define LENDFUNC(flags,mem,nargs,func,args) \
  STATIC_INLINE void func args \
  { \
  if (LDECISION) { \
    lopt_op##nargs##args do_##func, mem, flags); \
  } else { \
    do_##func ldirect##nargs##args; \
  } \
  }

typedef struct lopt_inst_rec {
    void* func;
    uae_u32 args[5];
    uae_u8 argtype[5];
    uae_s8 nargs;
    uae_u8 mem;
    uae_u8 flags;
} lopt_inst;



static lopt_inst linst[MAXLOPTINST];
static int lopt_index=0;

STATIC_INLINE int argsize(int type)
{
    return type&SIZEMASK;
}

STATIC_INLINE int reads_mem(int i) {
    return linst[i].mem & READ;
}


STATIC_INLINE int access_reg(int i, int r, int mode)
{
    int k;
    for (k=0;k<linst[i].nargs;k++)
	if (linst[i].args[k]==r &&
	    (linst[i].argtype[k]&mode) &&
	    !(linst[i].argtype[k]&FLOAT))
	    return 1;
    return 0;
}

STATIC_INLINE_ int writes_reg(int i, int r)
{
    return access_reg(i,r,WRITE);
}

STATIC_INLINE int reads_reg(int i, int r)
{
    return access_reg(i,r,READ);
}

STATIC_INLINE int uses_reg(int i, int r)
{
    return access_reg(i,r,RMW);
}


STATIC_INLINE int writes_mem(int i) {
    return linst[i].mem & WRITE;
}

STATIC_INLINE int uses_mem(int i)
{
    return linst[i].mem & RMW;
}

STATIC_INLINE int reads_flags(int i) {
    return linst[i].flags & READ;
}

STATIC_INLINE int writes_flags(int i) {
    return linst[i].flags & WRITE;
}

STATIC_INLINE int uses_flags(int i)
{
    return linst[i].flags & RMW;
}

static void do_raw_mov_l_rm(W4,MEMR);
static void do_raw_fflags_save(void);


/* Whether i depends on j */
STATIC_INLINE int depends_on(int i, int j)
{
    int n;

    /* First, check memory */
    if (writes_mem(i) && uses_mem(j))
	return 1;
    if (reads_mem(i) && writes_mem(j))
	return 1;

    /* Next, check flags */
    if (writes_flags(i) && uses_flags(j))
	return 1;
    if (reads_flags(i) && writes_flags(j))
	return 1;

    for (n=0;n<linst[i].nargs;n++) {
	if (linst[i].argtype[n] & FLOAT)
	    return 1;
    }
    for (n=0;n<linst[j].nargs;n++) {
	if (linst[j].argtype[n] & FLOAT)
	    return 1;
    }

    for (n=0;n<linst[i].nargs;n++) {
	if ((linst[i].argtype[n] & WRITE) &&
	    !(linst[i].argtype[n] & FLOAT)) {
	    if (uses_reg(j,linst[i].args[n]))
		return 1;
	}
	else if ((linst[i].argtype[n] & READ) &&
		 !(linst[i].argtype[n] & FLOAT))  {
	    if (writes_reg(j,linst[i].args[n]))
		return 1;
	}
    }

    /* The need for this indicates a problem somewhere in the
       LOWFUNC definitions --- I think. FIXME! */

    if (uses_flags(j) && uses_flags(i))
	return 1;
    if (linst[i].func==do_raw_fflags_save)
	return 1;
    if (linst[j].func==do_raw_fflags_save)
	return 1;

    return 0;
}

static void do_raw_mov_l_rm(W4 d, MEMR s);

STATIC_INLINE void low_peephole(void)
{
    int i;

    for (i=0;i<lopt_index;i++) {
	if (uses_mem(i)) {
	    int j=i-1;

	    while (j>=i-4 && j>=0 && !depends_on(i,j)) {
		j--;
	    }
	    if (j!=i-1) {
		lopt_inst x=linst[i];
		int k=i;

		j++;
		while (k>j) {
		    linst[k]=linst[k-1];
		    k--;
		}
		linst[j]=x;
	    }
	}
    }
}


typedef void lopt_handler0(void);
typedef void lopt_handler1(uae_u32);
typedef void lopt_handler2(uae_u32,uae_u32);
typedef void lopt_handler3(uae_u32,uae_u32,uae_u32);
typedef void lopt_handler4(uae_u32,uae_u32,uae_u32,uae_u32);
typedef void lopt_handler5(uae_u32,uae_u32,uae_u32,uae_u32,uae_u32);

static void lopt_emit_all(void)
{
    int i;
    lopt_inst* x;
    static int inemit=0;

    if (inemit) {
	printf("WARNING: lopt_emit is not reentrant!\n");
    }
    inemit=1;

    low_peephole();

    for (i=0;i<lopt_index;i++) {
	x=linst+i;
	switch(x->nargs) {
	 case 0: ((lopt_handler0*)x->func)(); break;
	 case 1: ((lopt_handler1*)x->func)(x->args[0]); break;
	 case 2: ((lopt_handler2*)x->func)(x->args[0],x->args[1]); break;
	 case 3: ((lopt_handler3*)x->func)(x->args[0],x->args[1],x->args[2]); break;
	 case 4: ((lopt_handler4*)x->func)(x->args[0],x->args[1],x->args[2],
					   x->args[3]); break;
	 case 5: ((lopt_handler5*)x->func)(x->args[0],x->args[1],x->args[2],
					   x->args[3],x->args[4]); break;
	 default: abort();
	}
    }
    lopt_index=0;
    inemit=0;
}

STATIC_INLINE void low_advance(void)
{
  lopt_index++;
  if (lopt_index==MAXLOPTINST)
    lopt_emit_all();
}

STATIC_INLINE void lopt_store_op0(void* lfuncptr, uae_u32 lmem,
				      uae_u32 lflags)
{
  linst[lopt_index].func=lfuncptr;
  linst[lopt_index].mem=lmem;
  linst[lopt_index].flags=lflags;
  linst[lopt_index].nargs=0;
  low_advance();
}

STATIC_INLINE void lopt_store_op1(uae_u8 t1, uae_u32 a1,
				      void* lfuncptr, uae_u32 lmem,
				      uae_u32 lflags)
{
  linst[lopt_index].func=lfuncptr;
  linst[lopt_index].mem=lmem;
  linst[lopt_index].flags=lflags;
  linst[lopt_index].nargs=1;
  linst[lopt_index].argtype[0]=t1;
  linst[lopt_index].args[0]=a1;
  low_advance();
}

STATIC_INLINE void lopt_store_op2(uae_u8 t1, uae_u32 a1,
				      uae_u8 t2, uae_u32 a2,
				      void* lfuncptr, uae_u32 lmem,
				      uae_u32 lflags)
{
  linst[lopt_index].func=lfuncptr;
  linst[lopt_index].mem=lmem;
  linst[lopt_index].flags=lflags;
  linst[lopt_index].nargs=2;
  linst[lopt_index].argtype[0]=t1;
  linst[lopt_index].args[0]=a1;
  linst[lopt_index].argtype[1]=t2;
  linst[lopt_index].args[1]=a2;
  low_advance();
}

STATIC_INLINE void lopt_store_op3(uae_u8 t1, uae_u32 a1,
				      uae_u8 t2, uae_u32 a2,
				      uae_u8 t3, uae_u32 a3,
				      void* lfuncptr, uae_u32 lmem,
				      uae_u32 lflags)
{
  linst[lopt_index].func=lfuncptr;
  linst[lopt_index].mem=lmem;
  linst[lopt_index].flags=lflags;
  linst[lopt_index].nargs=3;
  linst[lopt_index].argtype[0]=t1;
  linst[lopt_index].args[0]=a1;
  linst[lopt_index].argtype[1]=t2;
  linst[lopt_index].args[1]=a2;
  linst[lopt_index].argtype[2]=t3;
  linst[lopt_index].args[2]=a3;
  low_advance();
}

STATIC_INLINE void lopt_store_op4(uae_u8 t1, uae_u32 a1,
				      uae_u8 t2, uae_u32 a2,
				      uae_u8 t3, uae_u32 a3,
				      uae_u8 t4, uae_u32 a4,
				      void* lfuncptr, uae_u32 lmem,
				      uae_u32 lflags)
{
  linst[lopt_index].func=lfuncptr;
  linst[lopt_index].mem=lmem;
  linst[lopt_index].flags=lflags;
  linst[lopt_index].nargs=4;
  linst[lopt_index].argtype[0]=t1;
  linst[lopt_index].args[0]=a1;
  linst[lopt_index].argtype[1]=t2;
  linst[lopt_index].args[1]=a2;
  linst[lopt_index].argtype[2]=t3;
  linst[lopt_index].args[2]=a3;
  linst[lopt_index].argtype[3]=t4;
  linst[lopt_index].args[3]=a4;
  low_advance();
}

STATIC_INLINE void lopt_store_op5(uae_u8 t1, uae_u32 a1,
				      uae_u8 t2, uae_u32 a2,
				      uae_u8 t3, uae_u32 a3,
				      uae_u8 t4, uae_u32 a4,
				      uae_u8 t5, uae_u32 a5,
				      void* lfuncptr, uae_u32 lmem,
				      uae_u32 lflags)
{
  linst[lopt_index].func=lfuncptr;
  linst[lopt_index].mem=lmem;
  linst[lopt_index].flags=lflags;
  linst[lopt_index].nargs=5;
  linst[lopt_index].argtype[0]=t1;
  linst[lopt_index].args[0]=a1;
  linst[lopt_index].argtype[1]=t2;
  linst[lopt_index].args[1]=a2;
  linst[lopt_index].argtype[2]=t3;
  linst[lopt_index].args[2]=a3;
  linst[lopt_index].argtype[3]=t4;
  linst[lopt_index].args[3]=a4;
  linst[lopt_index].argtype[4]=t5;
  linst[lopt_index].args[4]=a5;
  low_advance();
}

STATIC_INLINE void empty_low_optimizer(void)
{
  lopt_emit_all();
}

#else
#define lopt_emit_all()
#define empty_low_optimizer()
#endif