--- hatari/src/debug/profilecpu.c 2019/04/09 08:55:35 1.1.1.3 +++ hatari/src/debug/profilecpu.c 2019/04/09 08:58:04 1.1.1.5 @@ -20,10 +20,11 @@ const char Profilecpu_fileid[] = "Hatari #include "dsp.h" #include "m68000.h" #include "68kDisass.h" +#include "symbols.h" #include "profile.h" #include "profile_priv.h" +#include "debug_priv.h" #include "stMemory.h" -#include "symbols.h" #include "tos.h" #include "screen.h" #include "video.h" @@ -52,17 +53,27 @@ const char Profilecpu_fileid[] = "Hatari static bool skip_assert; #endif +/* whether to track & show all cache stats for all instructions */ +#define DEBUG_CACHE 0 + + static callinfo_t cpu_callinfo; #define MAX_CPU_PROFILE_VALUE 0xFFFFFFFF typedef struct { - Uint32 count; /* how many times this address instrcution is executed */ + Uint32 count; /* how many times this address instruction is executed */ Uint32 cycles; /* how many CPU cycles was taken at this address */ - Uint32 i_misses; /* how many CPU instruction cache misses happened at this address */ - Uint32 d_hits; /* how many CPU data cache hits happened at this address */ +#if DEBUG_CACHE /* track also less relevant cache events */ + Uint32 i_hits; /* how many CPU i-cache hits happened at this address */ + Uint32 d_misses; /* how many CPU d-cache misses happened at this address */ +#endif + Uint32 i_misses; /* how many CPU i-cache misses happened at this address */ + Uint32 d_hits; /* how many CPU d-cache hits happened at this address */ } cpu_profile_item_t; + +/* max count of hits/misses single instruction can trigger at once */ #define MAX_I_HITS 8 #define MAX_I_MISSES 8 #define MAX_D_HITS 32 @@ -86,6 +97,7 @@ static struct { Uint32 loop_count; /* how many times it was looped */ Uint32 disasm_addr; /* 'addresses' command start address */ #if ENABLE_WINUAE_CPU + Uint32 i_prefetched; /* instructions that don't incur prefetch hit/miss */ Uint32 i_hit_counts[MAX_I_HITS]; /* I-cache hit counts */ Uint32 d_hit_counts[MAX_D_HITS]; /* D-cache hit counts */ Uint32 i_miss_counts[MAX_I_MISSES]; /* I-cache miss counts */ @@ -131,7 +143,7 @@ static inline Uint32 address2index(Uint3 pc += TosSize; } #if ENABLE_WINUAE_CPU - } else if (TTmemory && pc >= TTRAM_START && pc < TTRAM_START + 1024*1024*(unsigned)ConfigureParams.Memory.nTTRamSize) { + } else if (TTmemory && pc >= TTRAM_START && pc < TTRAM_START + 1024*(unsigned)ConfigureParams.Memory.TTRamSize_KB) { pc += STRamEnd + TosSize + CART_SIZE - TTRAM_START; #endif } else { @@ -188,26 +200,40 @@ static Uint32 index2address(Uint32 idx) /* ------------------ CPU profile results ----------------- */ /** - * Get CPU cycles, count and count percentage for given address. + * Write string containing CPU cache stats, cycles, count, count percentage + * for given address to provided buffer. + * * Return true if data was available and non-zero, false otherwise. */ -bool Profile_CpuAddressData(Uint32 addr, float *percentage, Uint32 *count, Uint32 *cycles, Uint32 *i_misses, Uint32 *d_hits) +bool Profile_CpuAddressDataStr(char *buffer, size_t maxlen, Uint32 addr) { + cpu_profile_item_t *item; + float percentage; Uint32 idx; + + assert(buffer && maxlen); if (!cpu_profile.data) { return false; } idx = address2index(addr); - *i_misses = cpu_profile.data[idx].i_misses; - *d_hits = cpu_profile.data[idx].d_hits; - *cycles = cpu_profile.data[idx].cycles; - *count = cpu_profile.data[idx].count; + item = &(cpu_profile.data[idx]); + if (cpu_profile.all.count) { - *percentage = 100.0*(*count)/cpu_profile.all.count; + percentage = 100.0 * item->count / cpu_profile.all.count; } else { - *percentage = 0.0; + percentage = 0.0; } - return (*count > 0); +#if DEBUG_CACHE + snprintf(buffer, maxlen, "%5.2f%% (%u, %u, %u, %u, %u, %u)", + percentage, item->count, item->cycles, + item->i_hits, item->i_misses, + item->d_hits, item->d_misses); +#else + snprintf(buffer, maxlen, "%5.2f%% (%u, %u, %u, %u)", + percentage, item->count, item->cycles, + item->i_misses, item->d_hits); +#endif + return (item->count > 0); } /** @@ -222,27 +248,27 @@ static void show_cpu_area_stats(profile_ fprintf(stderr, "- active address range:\n 0x%06x-0x%06x\n", index2address(area->lowest), index2address(area->highest)); - fprintf(stderr, "- active instruction addresses:\n %d (%.2f%% of all)\n", + fprintf(stderr, "- active instruction addresses:\n %d (%.2f%% of all areas)\n", area->active, 100.0 * area->active / cpu_profile.active); - fprintf(stderr, "- executed instructions:\n %"PRIu64" (%.2f%% of all)\n", + fprintf(stderr, "- executed instructions:\n %"PRIu64" (%.2f%% of all areas)\n", area->counters.count, 100.0 * area->counters.count / cpu_profile.all.count); /* CPU cache in use? */ if (cpu_profile.all.i_misses) { - fprintf(stderr, "- instruction cache misses:\n %"PRIu64" (%.2f%% of all)\n", + fprintf(stderr, "- instruction cache misses:\n %"PRIu64" (%.2f%% of all areas)\n", area->counters.i_misses, 100.0 * area->counters.i_misses / cpu_profile.all.i_misses); } if (cpu_profile.all.d_hits) { - fprintf(stderr, "- data cache hits:\n %"PRIu64" (%.2f%% of all)\n", + fprintf(stderr, "- data cache hits:\n %"PRIu64" (%.2f%% of all areas)\n", area->counters.d_hits, 100.0 * area->counters.d_hits / cpu_profile.all.d_hits); } - fprintf(stderr, "- used cycles:\n %"PRIu64" (%.2f%% of all)\n = %.5fs\n", + fprintf(stderr, "- used cycles:\n %"PRIu64" (%.2f%% of all areas)\n = %.5fs\n", area->counters.cycles, 100.0 * area->counters.cycles / cpu_profile.all.cycles, - (double)area->counters.cycles / MachineClocks.CPU_Freq); + (double)area->counters.cycles / MachineClocks.CPU_Freq_Emul); if (area->overflow) { fprintf(stderr, " *** COUNTER OVERFLOW! ***\n"); } @@ -263,13 +289,13 @@ void Profile_CpuShowStats(void) fprintf(stderr, "Cartridge ROM (0x%X-%X):\n", CART_START, CART_END); show_cpu_area_stats(&cpu_profile.rom); - if (TTmemory && ConfigureParams.Memory.nTTRamSize) { - fprintf(stderr, "TT-RAM (0x%X-%X):\n", TTRAM_START, TTRAM_START + 1024*1024*ConfigureParams.Memory.nTTRamSize); + if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) { + fprintf(stderr, "TT-RAM (0x%X-%X):\n", TTRAM_START, TTRAM_START + 1024*ConfigureParams.Memory.TTRamSize_KB); show_cpu_area_stats(&cpu_profile.ttram); } fprintf(stderr, "\n= %.5fs\n", - (double)cpu_profile.all.cycles / MachineClocks.CPU_Freq); + (double)cpu_profile.all.cycles / MachineClocks.CPU_Freq_Emul); } #if ENABLE_WINUAE_CPU @@ -278,15 +304,11 @@ void Profile_CpuShowStats(void) */ static void show_histogram(const char *title, int count, Uint32 *items) { - Uint64 maxval; + const Uint64 maxval = cpu_profile.all.count; Uint32 value; int i; - fprintf(stderr, "\n%s, number of occurrencies:\n", title); - maxval = 0; - for (i = 0; i < count; i++) { - maxval += items[i]; - } + fprintf(stderr, "\n%s, number of occurrences:\n", title); for (i = 0; i < count; i++) { value = items[i]; if (value) { @@ -306,17 +328,25 @@ static void show_histogram(const char *t void Profile_CpuShowCaches(void) { if (!(cpu_profile.all.i_misses || cpu_profile.all.d_hits)) { - fprintf(stderr, "No instruction/data cache information."); + fprintf(stderr, "No instruction/data cache information.\n"); return; } + fprintf(stderr, + "\nNote:\n" + "- these statistics include all profiled instructions, but\n" + "- instruction cache events happen only on prefetch/branch\n" + "- data cache events can happen only for instructions that do memory reads\n" + "\nAlready prefetched instructions: %.3f%% (no hits/misses)\n", + 100.0 * cpu_profile.i_prefetched / cpu_profile.all.count); + show_histogram("Instruction cache hits per instruction", - ARRAYSIZE(cpu_profile.i_hit_counts), cpu_profile.i_hit_counts); + ARRAY_SIZE(cpu_profile.i_hit_counts), cpu_profile.i_hit_counts); show_histogram("Instruction cache misses per instruction", - ARRAYSIZE(cpu_profile.i_miss_counts), cpu_profile.i_miss_counts); + ARRAY_SIZE(cpu_profile.i_miss_counts), cpu_profile.i_miss_counts); show_histogram("Data cache hits per instruction", - ARRAYSIZE(cpu_profile.d_hit_counts), cpu_profile.d_hit_counts); + ARRAY_SIZE(cpu_profile.d_hit_counts), cpu_profile.d_hit_counts); show_histogram("Data cache misses per instruction", - ARRAYSIZE(cpu_profile.d_miss_counts), cpu_profile.d_miss_counts); + ARRAY_SIZE(cpu_profile.d_miss_counts), cpu_profile.d_miss_counts); } #else void Profile_CpuShowCaches(void) { @@ -328,10 +358,10 @@ void Profile_CpuShowCaches(void) { * Show CPU instructions which execution was profiled, in the address order, * starting from the given address. Return next disassembly address. */ -Uint32 Profile_CpuShowAddresses(Uint32 lower, Uint32 upper, FILE *out) +Uint32 Profile_CpuShowAddresses(Uint32 lower, Uint32 upper, FILE *out, paging_t use_paging) { int oldcols[DISASM_COLUMNS], newcols[DISASM_COLUMNS]; - int show, shown, active; + int show, shown, addrs, active; const char *symbol; cpu_profile_item_t *data; Uint32 idx, end, size; @@ -353,11 +383,14 @@ Uint32 Profile_CpuShowAddresses(Uint32 l } } else { end = size; - show = ConfigureParams.Debugger.nDisasmLines; + show = DebugUI_GetPageLines(ConfigureParams.Debugger.nDisasmLines, 0); if (!show || show > active) { show = active; } } + if (use_paging == PAGING_DISABLED) { + show = INT_MAX; + } /* get/change columns */ Disasm_GetColumns(oldcols); @@ -365,26 +398,30 @@ Uint32 Profile_CpuShowAddresses(Uint32 l Disasm_SetColumns(newcols); fputs("# disassembly with profile data: % (, , , )\n", out); + shown = 2; /* first and last printf */ - nextpc = 0; + addrs = nextpc = 0; idx = address2index(lower); - for (shown = 0; shown < show && idx < end; idx++) { + for (; shown < show && idx < end; idx++) { if (!data[idx].count) { continue; } addr = index2address(idx); if (addr != nextpc && nextpc) { fprintf(out, "[...]\n"); + shown++; } - symbol = Symbols_GetByCpuAddress(addr); + symbol = Symbols_GetByCpuAddress(addr, SYMTYPE_TEXT); if (symbol) { fprintf(out, "%s:\n", symbol); + shown++; } /* NOTE: column setup works only with 68kDisass disasm engine! */ Disasm(out, addr, &nextpc, 1); shown++; + addrs++; } - printf("Disassembled %d (of active %d) CPU addresses.\n", shown, active); + printf("Disassembled %d (of active %d) CPU addresses.\n", addrs, active); /* restore disassembly columns */ Disasm_SetColumns(oldcols); @@ -642,7 +679,7 @@ void Profile_CpuShowCounts(int show, boo return; } - symbols = Symbols_CpuCount(); + symbols = Symbols_CpuCodeCount(); if (!symbols) { fprintf(stderr, "ERROR: no CPU symbols loaded!\n"); return; @@ -655,7 +692,7 @@ void Profile_CpuShowCounts(int show, boo for (end = sort_arr + active; sort_arr < end; sort_arr++) { addr = index2address(*sort_arr); - name = Symbols_GetByCpuAddress(addr); + name = Symbols_GetByCpuAddress(addr, SYMTYPE_TEXT); if (!name) { continue; } @@ -681,7 +718,7 @@ static const char * addr2name(Uint32 add { Uint32 idx = address2index(addr); *total = cpu_profile.data[idx].count; - return Symbols_GetByCpuAddress(addr); + return Symbols_GetByCpuAddress(addr, SYMTYPE_TEXT); } /** @@ -713,13 +750,13 @@ void Profile_CpuSave(FILE *out) if (text && (text < TosAddress || text >= TTRAM_START)) { fprintf(out, "PROGRAM_TEXT:\t0x%06x-0x%06x\n", text, DebugInfo_GetTEXTEnd()); } - if (TTmemory && ConfigureParams.Memory.nTTRamSize) { - end = TTRAM_START + 1024*1024*ConfigureParams.Memory.nTTRamSize; + if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) { + end = TTRAM_START + 1024*ConfigureParams.Memory.TTRamSize_KB; fprintf(out, "TT_RAM:\t\t0x%08x-0x%08x\n", TTRAM_START, end); } else if (end < CART_END) { end = CART_END; } - Profile_CpuShowAddresses(0, end-2, out); + Profile_CpuShowAddresses(0, end-2, out, PAGING_DISABLED); Profile_CpuShowCallers(out); } @@ -749,8 +786,8 @@ bool Profile_CpuStart(void) /* Shouldn't change within same debug session */ size = (STRamEnd + CART_SIZE + TosSize) / 2; - if (TTmemory && ConfigureParams.Memory.nTTRamSize) { - size += ConfigureParams.Memory.nTTRamSize * 1024*1024/2; + if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) { + size += ConfigureParams.Memory.TTRamSize_KB * 1024/2; } /* Add one entry for catching invalid PC values */ @@ -763,7 +800,7 @@ bool Profile_CpuStart(void) (int)sizeof(*cpu_profile.data)*size/(1024*1024)); cpu_profile.size = size; - Profile_AllocCallinfo(&(cpu_callinfo), Symbols_CpuCount(), "CPU"); + Profile_AllocCallinfo(&(cpu_callinfo), Symbols_CpuCodeCount(), "CPU"); /* special hack for EmuTOS */ etos_switcher = PC_UNDEFINED; @@ -895,7 +932,7 @@ static void collect_calls(Uint32 pc, cou } /* address is one which we're tracking? */ - idx = Symbols_GetCpuAddressIndex(pc); + idx = Symbols_GetCpuCodeIndex(pc); if (unlikely(idx >= 0)) { flag = cpu_opcode_type(family, prev_pc, pc); @@ -1046,6 +1083,18 @@ void Profile_CpuUpdate(void) CpuInstruction.D_Cache_miss = 0; /* tracked for every address */ +# if DEBUG_CACHE + if (likely(prev->i_hits < MAX_CPU_PROFILE_VALUE - i_hits)) { + prev->i_hits += i_hits; + } else { + prev->i_hits = MAX_CPU_PROFILE_VALUE; + } + if (likely(prev->d_misses < MAX_CPU_PROFILE_VALUE - d_misses)) { + prev->d_misses += d_misses; + } else { + prev->d_misses = MAX_CPU_PROFILE_VALUE; + } +# endif if (likely(prev->i_misses < MAX_CPU_PROFILE_VALUE - i_misses)) { prev->i_misses += i_misses; } else { @@ -1058,6 +1107,9 @@ void Profile_CpuUpdate(void) } /* tracking for histogram, check for array overflows */ + if (!(i_hits || i_misses)) { + cpu_profile.i_prefetched++; + } if (unlikely(i_hits >= MAX_I_HITS)) { i_hits = warn_too_large("number of CPU instruction cache hits", i_hits, MAX_I_HITS, prev_pc, pc); } @@ -1077,12 +1129,12 @@ void Profile_CpuUpdate(void) d_misses = warn_too_large("number of CPU data cache misses", d_misses, MAX_D_MISSES, prev_pc, pc); } cpu_profile.d_miss_counts[d_misses]++; -#endif +#endif /* ENABLE_WINUAE_CPU */ if (cpu_callinfo.sites) { collect_calls(prev_pc, counters); } - /* counters are increased after caller info is processed, + /* total counters are increased after caller info is processed, * otherwise cost for the instruction calling the callee * doesn't get accounted to caller (but callee). */ @@ -1201,8 +1253,8 @@ void Profile_CpuStop(void) /* user didn't change RAM or TOS size in the meanwhile? */ size = stsize = (STRamEnd + CART_SIZE + TosSize) / 2; - if (TTmemory && ConfigureParams.Memory.nTTRamSize) { - size += ConfigureParams.Memory.nTTRamSize * 1024*1024/2; + if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) { + size += ConfigureParams.Memory.TTRamSize_KB * 1024/2; } assert(cpu_profile.size == size); @@ -1280,7 +1332,7 @@ void Profile_CpuGetPointers(bool **enabl /** * Get callinfo & symbol search pointers for stack walking. */ -void Profile_CpuGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32)) +void Profile_CpuGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32, symtype_t)) { *callinfo = &(cpu_callinfo); *get_symbol = Symbols_GetByCpuAddress;