--- qemu/tcg/arm/tcg-target.c 2018/04/24 18:25:16 1.1.1.5 +++ qemu/tcg/arm/tcg-target.c 2018/04/24 19:35:48 1.1.1.9 @@ -113,12 +113,25 @@ static const int tcg_target_call_oarg_re TCG_REG_R0, TCG_REG_R1 }; +static inline void reloc_abs32(void *code_ptr, tcg_target_long target) +{ + *(uint32_t *) code_ptr = target; +} + +static inline void reloc_pc24(void *code_ptr, tcg_target_long target) +{ + uint32_t offset = ((target - ((tcg_target_long) code_ptr + 8)) >> 2); + + *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & ~0xffffff) + | (offset & 0xffffff); +} + static void patch_reloc(uint8_t *code_ptr, int type, tcg_target_long value, tcg_target_long addend) { switch (type) { case R_ARM_ABS32: - *(uint32_t *) code_ptr = value; + reloc_abs32(code_ptr, value); break; case R_ARM_CALL: @@ -127,8 +140,7 @@ static void patch_reloc(uint8_t *code_pt tcg_abort(); case R_ARM_PC24: - *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & 0xff000000) | - (((value - ((tcg_target_long) code_ptr + 8)) >> 2) & 0xffffff); + reloc_pc24(code_ptr, value); break; } } @@ -340,6 +352,9 @@ static inline void tcg_out_b(TCGContext static inline void tcg_out_b_noaddr(TCGContext *s, int cond) { + /* We pay attention here to not modify the branch target by skipping + the corresponding bytes. This ensure that caches and memory are + kept coherent during retranslation. */ #ifdef HOST_WORDS_BIGENDIAN tcg_out8(s, (cond << 4) | 0x0a); s->code_ptr += 3; @@ -360,6 +375,12 @@ static inline void tcg_out_blx(TCGContex tcg_out32(s, (cond << 28) | 0x012fff30 | rn); } +static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset) +{ + tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) | + (((offset - 8) >> 2) & 0x00ffffff)); +} + static inline void tcg_out_dat_reg(TCGContext *s, int cond, int opc, int rd, int rn, int rm, int shift) { @@ -394,35 +415,38 @@ static inline void tcg_out_dat_imm(TCGCo } static inline void tcg_out_movi32(TCGContext *s, - int cond, int rd, int32_t arg) + int cond, int rd, uint32_t arg) { /* TODO: This is very suboptimal, we can easily have a constant * pool somewhere after all the instructions. */ - - if (arg < 0 && arg > -0x100) - return tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, (~arg) & 0xff); - - if (use_armv7_instructions) { + if ((int)arg < 0 && (int)arg >= -0x100) { + tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, (~arg) & 0xff); + } else if (use_armv7_instructions) { /* use movw/movt */ /* movw */ tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12) | ((arg << 4) & 0x000f0000) | (arg & 0xfff)); - if (arg & 0xffff0000) + if (arg & 0xffff0000) { /* movt */ tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12) | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff)); - } else { - tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, arg & 0xff); - if (arg & 0x0000ff00) - tcg_out_dat_imm(s, cond, ARITH_ORR, rd, rd, - ((arg >> 8) & 0xff) | 0xc00); - if (arg & 0x00ff0000) - tcg_out_dat_imm(s, cond, ARITH_ORR, rd, rd, - ((arg >> 16) & 0xff) | 0x800); - if (arg & 0xff000000) - tcg_out_dat_imm(s, cond, ARITH_ORR, rd, rd, - ((arg >> 24) & 0xff) | 0x400); } + } else { + int opc = ARITH_MOV; + int rn = 0; + + do { + int i, rot; + + i = ctz32(arg) & ~1; + rot = ((32 - i) << 7) & 0xf00; + tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot); + arg &= ~(0xff << i); + + opc = ARITH_ORR; + rn = rd; + } while (arg); + } } static inline void tcg_out_mul32(TCGContext *s, @@ -818,52 +842,58 @@ static inline void tcg_out_st8(TCGContex tcg_out_st8_12(s, cond, rd, rn, offset); } +/* The _goto case is normally between TBs within the same code buffer, + * and with the code buffer limited to 16MB we shouldn't need the long + * case. + * + * .... except to the prologue that is in its own buffer. + */ static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr) { int32_t val; + if (addr & 1) { + /* goto to a Thumb destination isn't supported */ + tcg_abort(); + } + val = addr - (tcg_target_long) s->code_ptr; if (val - 8 < 0x01fffffd && val - 8 > -0x01fffffd) tcg_out_b(s, cond, val); else { -#if 1 - tcg_abort(); -#else if (cond == COND_AL) { tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); - tcg_out32(s, addr); /* XXX: This is l->u.value, can we use it? */ + tcg_out32(s, addr); } else { tcg_out_movi32(s, cond, TCG_REG_R8, val - 8); tcg_out_dat_reg(s, cond, ARITH_ADD, TCG_REG_PC, TCG_REG_PC, TCG_REG_R8, SHIFT_IMM_LSL(0)); } -#endif } } -static inline void tcg_out_call(TCGContext *s, int cond, uint32_t addr) +/* The call case is mostly used for helpers - so it's not unreasonable + * for them to be beyond branch range */ +static inline void tcg_out_call(TCGContext *s, uint32_t addr) { int32_t val; val = addr - (tcg_target_long) s->code_ptr; - if (val < 0x01fffffd && val > -0x01fffffd) - tcg_out_bl(s, cond, val); - else { -#if 1 - tcg_abort(); -#else - if (cond == COND_AL) { - tcg_out_dat_imm(s, cond, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4); - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); - tcg_out32(s, addr); /* XXX: This is l->u.value, can we use it? */ + if (val - 8 < 0x02000000 && val - 8 >= -0x02000000) { + if (addr & 1) { + /* Use BLX if the target is in Thumb mode */ + if (!use_armv5_instructions) { + tcg_abort(); + } + tcg_out_blx_imm(s, val); } else { - tcg_out_movi32(s, cond, TCG_REG_R9, addr); - tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R14, 0, - TCG_REG_PC, SHIFT_IMM_LSL(0)); - tcg_out_bx(s, cond, TCG_REG_R9); + tcg_out_bl(s, COND_AL, val); } -#endif + } else { + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4); + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); + tcg_out32(s, addr); } } @@ -899,6 +929,27 @@ static inline void tcg_out_goto_label(TC #include "../../softmmu_defs.h" +#ifdef CONFIG_TCG_PASS_AREG0 +/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, + int mmu_idx) */ +static const void * const qemu_ld_helpers[4] = { + helper_ldb_mmu, + helper_ldw_mmu, + helper_ldl_mmu, + helper_ldq_mmu, +}; + +/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, + uintxx_t val, int mmu_idx) */ +static const void * const qemu_st_helpers[4] = { + helper_stb_mmu, + helper_stw_mmu, + helper_stl_mmu, + helper_stq_mmu, +}; +#else +/* legacy helper signature: __ld_mmu(target_ulong addr, int + mmu_idx) */ static void *qemu_ld_helpers[4] = { __ldb_mmu, __ldw_mmu, @@ -906,6 +957,8 @@ static void *qemu_ld_helpers[4] = { __ldq_mmu, }; +/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val, + int mmu_idx) */ static void *qemu_st_helpers[4] = { __stb_mmu, __stw_mmu, @@ -913,6 +966,7 @@ static void *qemu_st_helpers[4] = { __stq_mmu, }; #endif +#endif #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) @@ -960,10 +1014,10 @@ static inline void tcg_out_qemu_ld(TCGCo tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); /* In the - * ldr r1 [r0, #(offsetof(CPUState, tlb_table[mem_index][0].addr_read))] + * ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_read))] * below, the offset is likely to exceed 12 bits if mem_index != 0 and * not exceed otherwise, so use an - * add r0, r0, #(mem_index * sizeof *CPUState.tlb_table) + * add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table) * before. */ if (mem_index) @@ -971,7 +1025,7 @@ static inline void tcg_out_qemu_ld(TCGCo (mem_index << (TLB_SHIFT & 1)) | ((16 - (TLB_SHIFT >> 1)) << 8)); tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUState, tlb_table[0][0].addr_read)); + offsetof(CPUArchState, tlb_table[0][0].addr_read)); tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1, TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); /* Check alignment. */ @@ -982,12 +1036,12 @@ static inline void tcg_out_qemu_ld(TCGCo /* XXX: possibly we could use a block data load or writeback in * the first access. */ tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUState, tlb_table[0][0].addr_read) + 4); + offsetof(CPUArchState, tlb_table[0][0].addr_read) + 4); tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0)); # endif tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUState, tlb_table[0][0].addend)); + offsetof(CPUArchState, tlb_table[0][0].addend)); switch (opc) { case 0: @@ -1031,7 +1085,7 @@ static inline void tcg_out_qemu_ld(TCGCo } label_ptr = (void *) s->code_ptr; - tcg_out_b(s, COND_EQ, 8); + tcg_out_b_noaddr(s, COND_EQ); /* TODO: move this code to where the constants pool will be */ if (addr_reg != TCG_REG_R0) { @@ -1045,8 +1099,20 @@ static inline void tcg_out_qemu_ld(TCGCo TCG_REG_R1, 0, addr_reg2, SHIFT_IMM_LSL(0)); tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index); # endif - tcg_out_bl(s, COND_AL, (tcg_target_long) qemu_ld_helpers[s_bits] - - (tcg_target_long) s->code_ptr); +#ifdef CONFIG_TCG_PASS_AREG0 + /* XXX/FIXME: suboptimal and incorrect for 64 bit */ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + tcg_target_call_iarg_regs[2], 0, + tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0)); + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + tcg_target_call_iarg_regs[1], 0, + tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0)); + + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + tcg_target_call_iarg_regs[0], 0, TCG_AREG0, + SHIFT_IMM_LSL(0)); +#endif + tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]); switch (opc) { case 0 | 4: @@ -1076,7 +1142,7 @@ static inline void tcg_out_qemu_ld(TCGCo break; } - *label_ptr += ((void *) s->code_ptr - (void *) label_ptr - 8) >> 2; + reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr); #else /* !CONFIG_SOFTMMU */ if (GUEST_BASE) { uint32_t offset = GUEST_BASE; @@ -1181,10 +1247,10 @@ static inline void tcg_out_qemu_st(TCGCo tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); /* In the - * ldr r1 [r0, #(offsetof(CPUState, tlb_table[mem_index][0].addr_write))] + * ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_write))] * below, the offset is likely to exceed 12 bits if mem_index != 0 and * not exceed otherwise, so use an - * add r0, r0, #(mem_index * sizeof *CPUState.tlb_table) + * add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table) * before. */ if (mem_index) @@ -1192,7 +1258,7 @@ static inline void tcg_out_qemu_st(TCGCo (mem_index << (TLB_SHIFT & 1)) | ((16 - (TLB_SHIFT >> 1)) << 8)); tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUState, tlb_table[0][0].addr_write)); + offsetof(CPUArchState, tlb_table[0][0].addr_write)); tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1, TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); /* Check alignment. */ @@ -1203,12 +1269,12 @@ static inline void tcg_out_qemu_st(TCGCo /* XXX: possibly we could use a block data load or writeback in * the first access. */ tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUState, tlb_table[0][0].addr_write) + 4); + offsetof(CPUArchState, tlb_table[0][0].addr_write) + 4); tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0)); # endif tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUState, tlb_table[0][0].addend)); + offsetof(CPUArchState, tlb_table[0][0].addend)); switch (opc) { case 0: @@ -1236,7 +1302,7 @@ static inline void tcg_out_qemu_st(TCGCo tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg2); tcg_out_st32_rwb(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, addr_reg); tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg); - tcg_out_st32_12(s, COND_EQ, data_reg, TCG_REG_R1, 4); + tcg_out_st32_12(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, 4); } else { tcg_out_st32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg); tcg_out_st32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4); @@ -1245,7 +1311,7 @@ static inline void tcg_out_qemu_st(TCGCo } label_ptr = (void *) s->code_ptr; - tcg_out_b(s, COND_EQ, 8); + tcg_out_b_noaddr(s, COND_EQ); /* TODO: move this code to where the constants pool will be */ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, @@ -1312,12 +1378,27 @@ static inline void tcg_out_qemu_st(TCGCo } # endif - tcg_out_bl(s, COND_AL, (tcg_target_long) qemu_st_helpers[s_bits] - - (tcg_target_long) s->code_ptr); +#ifdef CONFIG_TCG_PASS_AREG0 + /* XXX/FIXME: suboptimal and incorrect for 64 bit */ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + tcg_target_call_iarg_regs[3], 0, + tcg_target_call_iarg_regs[2], SHIFT_IMM_LSL(0)); + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + tcg_target_call_iarg_regs[2], 0, + tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0)); + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + tcg_target_call_iarg_regs[1], 0, + tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0)); + + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + tcg_target_call_iarg_regs[0], 0, TCG_AREG0, + SHIFT_IMM_LSL(0)); +#endif + tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]); if (opc == 3) tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10); - *label_ptr += ((void *) s->code_ptr - (void *) label_ptr - 8) >> 2; + reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr); #else /* !CONFIG_SOFTMMU */ if (GUEST_BASE) { uint32_t offset = GUEST_BASE; @@ -1399,7 +1480,7 @@ static inline void tcg_out_op(TCGContext /* Direct jump method */ #if defined(USE_DIRECT_JUMP) s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; - tcg_out_b(s, COND_AL, 8); + tcg_out_b_noaddr(s, COND_AL); #else tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; @@ -1425,7 +1506,7 @@ static inline void tcg_out_op(TCGContext break; case INDEX_op_call: if (const_args[0]) - tcg_out_call(s, COND_AL, args[0]); + tcg_out_call(s, args[0]); else tcg_out_callr(s, COND_AL, args[0]); break; @@ -1769,57 +1850,48 @@ static void tcg_target_init(TCGContext * tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC); tcg_add_target_add_op_defs(arm_op_defs); + tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf), + CPU_TEMP_BUF_NLONGS * sizeof(long)); } -static inline void tcg_out_ld(TCGContext *s, TCGType type, int arg, - int arg1, tcg_target_long arg2) +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, tcg_target_long arg2) { tcg_out_ld32u(s, COND_AL, arg, arg1, arg2); } -static inline void tcg_out_st(TCGContext *s, TCGType type, int arg, - int arg1, tcg_target_long arg2) +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, tcg_target_long arg2) { tcg_out_st32(s, COND_AL, arg, arg1, arg2); } -static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) -{ - if (val > 0) - if (val < 0x100) - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, reg, reg, val); - else - tcg_abort(); - else if (val < 0) { - if (val > -0x100) - tcg_out_dat_imm(s, COND_AL, ARITH_SUB, reg, reg, -val); - else - tcg_abort(); - } -} - -static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg) +static inline void tcg_out_mov(TCGContext *s, TCGType type, + TCGReg ret, TCGReg arg) { tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0)); } static inline void tcg_out_movi(TCGContext *s, TCGType type, - int ret, tcg_target_long arg) + TCGReg ret, tcg_target_long arg) { tcg_out_movi32(s, COND_AL, ret, arg); } static void tcg_target_qemu_prologue(TCGContext *s) { - /* There is no need to save r7, it is used to store the address - of the env structure and is not modified by GCC. */ + /* Calling convention requires us to save r4-r11 and lr; + * save also r12 to maintain stack 8-alignment. + */ + + /* stmdb sp!, { r4 - r12, lr } */ + tcg_out32(s, (COND_AL << 28) | 0x092d5ff0); - /* stmdb sp!, { r4 - r6, r8 - r11, lr } */ - tcg_out32(s, (COND_AL << 28) | 0x092d4f70); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - tcg_out_bx(s, COND_AL, TCG_REG_R0); + tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]); tb_ret_addr = s->code_ptr; - /* ldmia sp!, { r4 - r6, r8 - r11, pc } */ - tcg_out32(s, (COND_AL << 28) | 0x08bd8f70); + /* ldmia sp!, { r4 - r12, pc } */ + tcg_out32(s, (COND_AL << 28) | 0x08bd9ff0); }