--- qemu/tcg/arm/tcg-target.c 2018/04/24 19:18:23 1.1.1.8 +++ qemu/tcg/arm/tcg-target.c 2018/04/24 19:35:48 1.1.1.9 @@ -842,6 +842,12 @@ static inline void tcg_out_st8(TCGContex tcg_out_st8_12(s, cond, rd, rn, offset); } +/* The _goto case is normally between TBs within the same code buffer, + * and with the code buffer limited to 16MB we shouldn't need the long + * case. + * + * .... except to the prologue that is in its own buffer. + */ static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr) { int32_t val; @@ -855,22 +861,20 @@ static inline void tcg_out_goto(TCGConte if (val - 8 < 0x01fffffd && val - 8 > -0x01fffffd) tcg_out_b(s, cond, val); else { -#if 1 - tcg_abort(); -#else if (cond == COND_AL) { tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); - tcg_out32(s, addr); /* XXX: This is l->u.value, can we use it? */ + tcg_out32(s, addr); } else { tcg_out_movi32(s, cond, TCG_REG_R8, val - 8); tcg_out_dat_reg(s, cond, ARITH_ADD, TCG_REG_PC, TCG_REG_PC, TCG_REG_R8, SHIFT_IMM_LSL(0)); } -#endif } } +/* The call case is mostly used for helpers - so it's not unreasonable + * for them to be beyond branch range */ static inline void tcg_out_call(TCGContext *s, uint32_t addr) { int32_t val; @@ -887,20 +891,9 @@ static inline void tcg_out_call(TCGConte tcg_out_bl(s, COND_AL, val); } } else { -#if 1 - tcg_abort(); -#else - if (cond == COND_AL) { - tcg_out_dat_imm(s, cond, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4); - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); - tcg_out32(s, addr); /* XXX: This is l->u.value, can we use it? */ - } else { - tcg_out_movi32(s, cond, TCG_REG_R9, addr); - tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R14, 0, - TCG_REG_PC, SHIFT_IMM_LSL(0)); - tcg_out_bx(s, cond, TCG_REG_R9); - } -#endif + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4); + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); + tcg_out32(s, addr); } } @@ -936,6 +929,27 @@ static inline void tcg_out_goto_label(TC #include "../../softmmu_defs.h" +#ifdef CONFIG_TCG_PASS_AREG0 +/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, + int mmu_idx) */ +static const void * const qemu_ld_helpers[4] = { + helper_ldb_mmu, + helper_ldw_mmu, + helper_ldl_mmu, + helper_ldq_mmu, +}; + +/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, + uintxx_t val, int mmu_idx) */ +static const void * const qemu_st_helpers[4] = { + helper_stb_mmu, + helper_stw_mmu, + helper_stl_mmu, + helper_stq_mmu, +}; +#else +/* legacy helper signature: __ld_mmu(target_ulong addr, int + mmu_idx) */ static void *qemu_ld_helpers[4] = { __ldb_mmu, __ldw_mmu, @@ -943,6 +957,8 @@ static void *qemu_ld_helpers[4] = { __ldq_mmu, }; +/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val, + int mmu_idx) */ static void *qemu_st_helpers[4] = { __stb_mmu, __stw_mmu, @@ -950,6 +966,7 @@ static void *qemu_st_helpers[4] = { __stq_mmu, }; #endif +#endif #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) @@ -997,10 +1014,10 @@ static inline void tcg_out_qemu_ld(TCGCo tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); /* In the - * ldr r1 [r0, #(offsetof(CPUState, tlb_table[mem_index][0].addr_read))] + * ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_read))] * below, the offset is likely to exceed 12 bits if mem_index != 0 and * not exceed otherwise, so use an - * add r0, r0, #(mem_index * sizeof *CPUState.tlb_table) + * add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table) * before. */ if (mem_index) @@ -1008,7 +1025,7 @@ static inline void tcg_out_qemu_ld(TCGCo (mem_index << (TLB_SHIFT & 1)) | ((16 - (TLB_SHIFT >> 1)) << 8)); tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUState, tlb_table[0][0].addr_read)); + offsetof(CPUArchState, tlb_table[0][0].addr_read)); tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1, TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); /* Check alignment. */ @@ -1019,12 +1036,12 @@ static inline void tcg_out_qemu_ld(TCGCo /* XXX: possibly we could use a block data load or writeback in * the first access. */ tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUState, tlb_table[0][0].addr_read) + 4); + offsetof(CPUArchState, tlb_table[0][0].addr_read) + 4); tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0)); # endif tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUState, tlb_table[0][0].addend)); + offsetof(CPUArchState, tlb_table[0][0].addend)); switch (opc) { case 0: @@ -1082,6 +1099,19 @@ static inline void tcg_out_qemu_ld(TCGCo TCG_REG_R1, 0, addr_reg2, SHIFT_IMM_LSL(0)); tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index); # endif +#ifdef CONFIG_TCG_PASS_AREG0 + /* XXX/FIXME: suboptimal and incorrect for 64 bit */ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + tcg_target_call_iarg_regs[2], 0, + tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0)); + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + tcg_target_call_iarg_regs[1], 0, + tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0)); + + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + tcg_target_call_iarg_regs[0], 0, TCG_AREG0, + SHIFT_IMM_LSL(0)); +#endif tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]); switch (opc) { @@ -1217,10 +1247,10 @@ static inline void tcg_out_qemu_st(TCGCo tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); /* In the - * ldr r1 [r0, #(offsetof(CPUState, tlb_table[mem_index][0].addr_write))] + * ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_write))] * below, the offset is likely to exceed 12 bits if mem_index != 0 and * not exceed otherwise, so use an - * add r0, r0, #(mem_index * sizeof *CPUState.tlb_table) + * add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table) * before. */ if (mem_index) @@ -1228,7 +1258,7 @@ static inline void tcg_out_qemu_st(TCGCo (mem_index << (TLB_SHIFT & 1)) | ((16 - (TLB_SHIFT >> 1)) << 8)); tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUState, tlb_table[0][0].addr_write)); + offsetof(CPUArchState, tlb_table[0][0].addr_write)); tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1, TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); /* Check alignment. */ @@ -1239,12 +1269,12 @@ static inline void tcg_out_qemu_st(TCGCo /* XXX: possibly we could use a block data load or writeback in * the first access. */ tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUState, tlb_table[0][0].addr_write) + 4); + offsetof(CPUArchState, tlb_table[0][0].addr_write) + 4); tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0)); # endif tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUState, tlb_table[0][0].addend)); + offsetof(CPUArchState, tlb_table[0][0].addend)); switch (opc) { case 0: @@ -1348,6 +1378,22 @@ static inline void tcg_out_qemu_st(TCGCo } # endif +#ifdef CONFIG_TCG_PASS_AREG0 + /* XXX/FIXME: suboptimal and incorrect for 64 bit */ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + tcg_target_call_iarg_regs[3], 0, + tcg_target_call_iarg_regs[2], SHIFT_IMM_LSL(0)); + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + tcg_target_call_iarg_regs[2], 0, + tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0)); + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + tcg_target_call_iarg_regs[1], 0, + tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0)); + + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + tcg_target_call_iarg_regs[0], 0, TCG_AREG0, + SHIFT_IMM_LSL(0)); +#endif tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]); if (opc == 3) tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10); @@ -1804,7 +1850,7 @@ static void tcg_target_init(TCGContext * tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC); tcg_add_target_add_op_defs(arm_op_defs); - tcg_set_frame(s, TCG_AREG0, offsetof(CPUState, temp_buf), + tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf), CPU_TEMP_BUF_NLONGS * sizeof(long)); }