|
|
1.1 ! root 1: /* ! 2: * Hatari - profilecpu.c ! 3: * ! 4: * Copyright (C) 2010-2013 by Eero Tamminen ! 5: * ! 6: * This file is distributed under the GNU General Public License, version 2 ! 7: * or at your option any later version. Read the file gpl.txt for details. ! 8: * ! 9: * profilecpu.c - functions for profiling CPU and showing the results. ! 10: */ ! 11: const char Profilecpu_fileid[] = "Hatari profilecpu.c : " __DATE__ " " __TIME__; ! 12: ! 13: #include <stdio.h> ! 14: #include <inttypes.h> ! 15: #include <assert.h> ! 16: #include "main.h" ! 17: #include "configuration.h" ! 18: #include "clocks_timings.h" ! 19: #include "debugInfo.h" ! 20: #include "dsp.h" ! 21: #include "m68000.h" ! 22: #include "68kDisass.h" ! 23: #include "profile.h" ! 24: #include "profile_priv.h" ! 25: #include "stMemory.h" ! 26: #include "symbols.h" ! 27: #include "tos.h" ! 28: ! 29: /* if non-zero, output (more) warnings on suspicious: ! 30: * - cycle/instruction counts ! 31: * - PC switches ! 32: * And drop to debugger on invalid PC addresses. ! 33: */ ! 34: #define DEBUG 0 ! 35: #if DEBUG ! 36: #include "debugui.h" ! 37: static bool skip_assert; ! 38: #endif ! 39: ! 40: static callinfo_t cpu_callinfo; ! 41: ! 42: /* This is relevant with WinUAE CPU core: ! 43: * - the default cycle exact variant needs this define to be non-zero ! 44: * - non-cycle exact and MMU variants need this define to be 0 ! 45: * for cycle counts to make any sense ! 46: */ ! 47: #define USE_CYCLES_COUNTER 1 ! 48: ! 49: #define MAX_CPU_PROFILE_VALUE 0xFFFFFFFF ! 50: ! 51: typedef struct { ! 52: Uint32 count; /* how many times this address instrcution is executed */ ! 53: Uint32 cycles; /* how many CPU cycles was taken at this address */ ! 54: Uint32 misses; /* how many CPU cache misses happened at this address */ ! 55: } cpu_profile_item_t; ! 56: ! 57: #define MAX_MISS 4 ! 58: ! 59: static struct { ! 60: counters_t all; /* total counts for all areas */ ! 61: Uint32 miss_counts[MAX_MISS]; /* cache miss counts */ ! 62: cpu_profile_item_t *data; /* profile data items */ ! 63: Uint32 size; /* number of allocated profile data items */ ! 64: profile_area_t ram; /* normal RAM stats */ ! 65: profile_area_t rom; /* cartridge ROM stats */ ! 66: profile_area_t tos; /* ROM TOS stats */ ! 67: int active; /* number of active data items in all areas */ ! 68: Uint32 *sort_arr; /* data indexes used for sorting */ ! 69: Uint32 prev_cycles; /* previous instruction cycles counter */ ! 70: Uint32 prev_pc; /* previous instruction address */ ! 71: int prev_family; /* previous instruction opcode family */ ! 72: Uint32 disasm_addr; /* 'addresses' command start address */ ! 73: bool processed; /* true when data is already processed */ ! 74: bool enabled; /* true when profiling enabled */ ! 75: } cpu_profile; ! 76: ! 77: /* special hack for EmuTOS */ ! 78: static Uint32 etos_switcher; ! 79: ! 80: ! 81: /* ------------------ CPU profile address mapping ----------------- */ ! 82: ! 83: /** ! 84: * convert Atari memory address to sorting array profile data index. ! 85: */ ! 86: static inline Uint32 address2index(Uint32 pc) ! 87: { ! 88: if (unlikely(pc & 1)) { ! 89: fprintf(stderr, "WARNING: odd CPU profile instruction address 0x%x!\n", pc); ! 90: #if DEBUG ! 91: skip_assert = true; ! 92: DebugUI(REASON_CPU_EXCEPTION); ! 93: #endif ! 94: } ! 95: if (pc >= TosAddress && pc < TosAddress + TosSize) { ! 96: /* TOS, put it after RAM data */ ! 97: pc = pc - TosAddress + STRamEnd; ! 98: ! 99: } else if (pc >= 0xFA0000 && pc < 0xFC0000) { ! 100: /* ROM, put it after RAM & TOS data */ ! 101: pc = pc - 0xFA0000 + STRamEnd + TosSize; ! 102: ! 103: } else { ! 104: /* if in RAM, use as-is */ ! 105: if (unlikely(pc >= STRamEnd)) { ! 106: fprintf(stderr, "WARNING: 'invalid' CPU PC profile instruction address 0x%x!\n", pc); ! 107: /* extra entry at end is reserved for invalid PC values */ ! 108: pc = STRamEnd + TosSize + 0x20000; ! 109: #if DEBUG ! 110: skip_assert = true; ! 111: DebugUI(REASON_CPU_EXCEPTION); ! 112: #endif ! 113: } ! 114: } ! 115: /* CPU instructions are at even addresses, save space by halving */ ! 116: return (pc >> 1); ! 117: } ! 118: ! 119: /** ! 120: * convert sorting array profile data index to Atari memory address. ! 121: */ ! 122: static Uint32 index2address(Uint32 idx) ! 123: { ! 124: idx <<= 1; ! 125: /* RAM */ ! 126: if (idx < STRamEnd) { ! 127: return idx; ! 128: } ! 129: /* TOS */ ! 130: idx -= STRamEnd; ! 131: if (idx < TosSize) { ! 132: return idx + TosAddress; ! 133: } ! 134: /* ROM */ ! 135: return idx - TosSize + 0xFA0000; ! 136: } ! 137: ! 138: /* ------------------ CPU profile results ----------------- */ ! 139: ! 140: /** ! 141: * Get CPU cycles, count and count percentage for given address. ! 142: * Return true if data was available and non-zero, false otherwise. ! 143: */ ! 144: bool Profile_CpuAddressData(Uint32 addr, float *percentage, Uint32 *count, Uint32 *cycles, Uint32 *misses) ! 145: { ! 146: Uint32 idx; ! 147: if (!cpu_profile.data) { ! 148: return false; ! 149: } ! 150: idx = address2index(addr); ! 151: *misses = cpu_profile.data[idx].misses; ! 152: *cycles = cpu_profile.data[idx].cycles; ! 153: *count = cpu_profile.data[idx].count; ! 154: if (cpu_profile.all.count) { ! 155: *percentage = 100.0*(*count)/cpu_profile.all.count; ! 156: } else { ! 157: *percentage = 0.0; ! 158: } ! 159: return (*count > 0); ! 160: } ! 161: ! 162: /** ! 163: * Helper to show statistics for specified CPU profile area. ! 164: */ ! 165: static void show_cpu_area_stats(profile_area_t *area) ! 166: { ! 167: if (!area->active) { ! 168: fprintf(stderr, "- no activity\n"); ! 169: return; ! 170: } ! 171: fprintf(stderr, "- active address range:\n 0x%06x-0x%06x\n", ! 172: index2address(area->lowest), ! 173: index2address(area->highest)); ! 174: fprintf(stderr, "- active instruction addresses:\n %d (%.2f%% of all)\n", ! 175: area->active, ! 176: 100.0 * area->active / cpu_profile.active); ! 177: fprintf(stderr, "- executed instructions:\n %"PRIu64" (%.2f%% of all)\n", ! 178: area->counters.count, ! 179: 100.0 * area->counters.count / cpu_profile.all.count); ! 180: #if ENABLE_WINUAE_CPU ! 181: if (cpu_profile.all.misses) { /* CPU cache in use? */ ! 182: fprintf(stderr, "- instruction cache misses:\n %"PRIu64" (%.2f%% of all)\n", ! 183: area->counters.misses, ! 184: 100.0 * area->counters.misses / cpu_profile.all.misses); ! 185: } ! 186: #endif ! 187: fprintf(stderr, "- used cycles:\n %"PRIu64" (%.2f%% of all)\n = %.5fs\n", ! 188: area->counters.cycles, ! 189: 100.0 * area->counters.cycles / cpu_profile.all.cycles, ! 190: (double)area->counters.cycles / MachineClocks.CPU_Freq); ! 191: if (area->overflow) { ! 192: fprintf(stderr, " *** COUNTER OVERFLOW! ***\n"); ! 193: } ! 194: } ! 195: ! 196: ! 197: /** ! 198: * show CPU area (RAM, ROM, TOS) specific statistics. ! 199: */ ! 200: void Profile_CpuShowStats(void) ! 201: { ! 202: fprintf(stderr, "Normal RAM (0-0x%X):\n", STRamEnd); ! 203: show_cpu_area_stats(&cpu_profile.ram); ! 204: ! 205: fprintf(stderr, "ROM TOS (0x%X-0x%X):\n", TosAddress, TosAddress + TosSize); ! 206: show_cpu_area_stats(&cpu_profile.tos); ! 207: ! 208: fprintf(stderr, "Cartridge ROM (0xFA0000-0xFC0000):\n"); ! 209: show_cpu_area_stats(&cpu_profile.rom); ! 210: ! 211: fprintf(stderr, "\n= %.5fs\n", ! 212: (double)cpu_profile.all.cycles / MachineClocks.CPU_Freq); ! 213: ! 214: #if ENABLE_WINUAE_CPU ! 215: if (cpu_profile.all.misses) { /* CPU cache in use? */ ! 216: int i; ! 217: fprintf(stderr, "\nCache misses per instruction, number of occurrences:\n"); ! 218: for (i = 0; i < MAX_MISS; i++) { ! 219: fprintf(stderr, "- %d: %d\n", i, cpu_profile.miss_counts[i]); ! 220: } ! 221: } ! 222: #endif ! 223: } ! 224: ! 225: /** ! 226: * Show CPU instructions which execution was profiled, in the address order, ! 227: * starting from the given address. Return next disassembly address. ! 228: */ ! 229: Uint32 Profile_CpuShowAddresses(Uint32 lower, Uint32 upper, FILE *out) ! 230: { ! 231: int oldcols[DISASM_COLUMNS], newcols[DISASM_COLUMNS]; ! 232: int show, shown, active; ! 233: const char *symbol; ! 234: cpu_profile_item_t *data; ! 235: Uint32 idx, end, size; ! 236: uaecptr nextpc, addr; ! 237: ! 238: data = cpu_profile.data; ! 239: if (!data) { ! 240: fprintf(stderr, "ERROR: no CPU profiling data available!\n"); ! 241: return 0; ! 242: } ! 243: ! 244: size = cpu_profile.size; ! 245: active = cpu_profile.active; ! 246: if (upper) { ! 247: end = address2index(upper); ! 248: show = active; ! 249: if (end > size) { ! 250: end = size; ! 251: } ! 252: } else { ! 253: end = size; ! 254: show = ConfigureParams.Debugger.nDisasmLines; ! 255: if (!show || show > active) { ! 256: show = active; ! 257: } ! 258: } ! 259: ! 260: /* get/change columns */ ! 261: Disasm_GetColumns(oldcols); ! 262: Disasm_DisableColumn(DISASM_COLUMN_HEXDUMP, oldcols, newcols); ! 263: Disasm_SetColumns(newcols); ! 264: ! 265: fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <sum of i-cache misses>)\n", out); ! 266: ! 267: nextpc = 0; ! 268: idx = address2index(lower); ! 269: for (shown = 0; shown < show && idx < end; idx++) { ! 270: if (!data[idx].count) { ! 271: continue; ! 272: } ! 273: addr = index2address(idx); ! 274: if (addr != nextpc && nextpc) { ! 275: fprintf(out, "[...]\n"); ! 276: } ! 277: symbol = Symbols_GetByCpuAddress(addr); ! 278: if (symbol) { ! 279: fprintf(out, "%s:\n", symbol); ! 280: } ! 281: /* NOTE: column setup works only with 68kDisass disasm engine! */ ! 282: Disasm(out, addr, &nextpc, 1); ! 283: shown++; ! 284: } ! 285: printf("Disassembled %d (of active %d) CPU addresses.\n", shown, active); ! 286: ! 287: /* restore disassembly columns */ ! 288: Disasm_SetColumns(oldcols); ! 289: return nextpc; ! 290: } ! 291: ! 292: /** ! 293: * remove all disassembly columns except instruction ones. ! 294: * data needed to restore columns is stored to "oldcols" ! 295: */ ! 296: static void leave_instruction_column(int *oldcols) ! 297: { ! 298: int i, newcols[DISASM_COLUMNS]; ! 299: ! 300: Disasm_GetColumns(oldcols); ! 301: for (i = 0; i < DISASM_COLUMNS; i++) { ! 302: if (i == DISASM_COLUMN_OPCODE || i == DISASM_COLUMN_OPERAND) { ! 303: continue; ! 304: } ! 305: Disasm_DisableColumn(i, oldcols, newcols); ! 306: oldcols = newcols; ! 307: } ! 308: Disasm_SetColumns(newcols); ! 309: } ! 310: ! 311: #if ENABLE_WINUAE_CPU ! 312: /** ! 313: * compare function for qsort() to sort CPU profile data by instruction cache misses. ! 314: */ ! 315: static int cmp_cpu_misses(const void *p1, const void *p2) ! 316: { ! 317: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].misses; ! 318: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].misses; ! 319: if (count1 > count2) { ! 320: return -1; ! 321: } ! 322: if (count1 < count2) { ! 323: return 1; ! 324: } ! 325: return 0; ! 326: } ! 327: ! 328: /** ! 329: * Sort CPU profile data addresses by instruction cache misses and show the results. ! 330: */ ! 331: void Profile_CpuShowMisses(int show) ! 332: { ! 333: int active; ! 334: int oldcols[DISASM_COLUMNS]; ! 335: Uint32 *sort_arr, *end, addr, nextpc; ! 336: cpu_profile_item_t *data = cpu_profile.data; ! 337: float percentage; ! 338: Uint32 count; ! 339: ! 340: if (!cpu_profile.all.misses) { ! 341: fprintf(stderr, "No CPU cache miss information available.\n"); ! 342: return; ! 343: } ! 344: ! 345: active = cpu_profile.active; ! 346: sort_arr = cpu_profile.sort_arr; ! 347: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_misses); ! 348: ! 349: leave_instruction_column(oldcols); ! 350: ! 351: printf("addr:\t\tmisses:\n"); ! 352: show = (show < active ? show : active); ! 353: for (end = sort_arr + show; sort_arr < end; sort_arr++) { ! 354: addr = index2address(*sort_arr); ! 355: count = data[*sort_arr].misses; ! 356: percentage = 100.0*count/cpu_profile.all.misses; ! 357: printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count, ! 358: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : ""); ! 359: Disasm(stdout, addr, &nextpc, 1); ! 360: } ! 361: printf("%d CPU addresses listed.\n", show); ! 362: ! 363: Disasm_SetColumns(oldcols); ! 364: } ! 365: #else ! 366: void Profile_CpuShowMisses(int show) { ! 367: fprintf(stderr, "Cache misses are recorded only with WinUAE CPU.\n"); ! 368: } ! 369: #endif ! 370: ! 371: ! 372: /** ! 373: * compare function for qsort() to sort CPU profile data by cycles counts. ! 374: */ ! 375: static int cmp_cpu_cycles(const void *p1, const void *p2) ! 376: { ! 377: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].cycles; ! 378: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].cycles; ! 379: if (count1 > count2) { ! 380: return -1; ! 381: } ! 382: if (count1 < count2) { ! 383: return 1; ! 384: } ! 385: return 0; ! 386: } ! 387: ! 388: /** ! 389: * Sort CPU profile data addresses by cycle counts and show the results. ! 390: */ ! 391: void Profile_CpuShowCycles(int show) ! 392: { ! 393: int active; ! 394: int oldcols[DISASM_COLUMNS]; ! 395: Uint32 *sort_arr, *end, addr, nextpc; ! 396: cpu_profile_item_t *data = cpu_profile.data; ! 397: float percentage; ! 398: Uint32 count; ! 399: ! 400: if (!data) { ! 401: fprintf(stderr, "ERROR: no CPU profiling data available!\n"); ! 402: return; ! 403: } ! 404: ! 405: active = cpu_profile.active; ! 406: sort_arr = cpu_profile.sort_arr; ! 407: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_cycles); ! 408: ! 409: leave_instruction_column(oldcols); ! 410: ! 411: printf("addr:\t\tcycles:\n"); ! 412: show = (show < active ? show : active); ! 413: for (end = sort_arr + show; sort_arr < end; sort_arr++) { ! 414: addr = index2address(*sort_arr); ! 415: count = data[*sort_arr].cycles; ! 416: percentage = 100.0*count/cpu_profile.all.cycles; ! 417: printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count, ! 418: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : ""); ! 419: Disasm(stdout, addr, &nextpc, 1); ! 420: } ! 421: printf("%d CPU addresses listed.\n", show); ! 422: ! 423: Disasm_SetColumns(oldcols); ! 424: } ! 425: ! 426: /** ! 427: * compare function for qsort() to sort CPU profile data by descending ! 428: * address access counts. ! 429: */ ! 430: static int cmp_cpu_count(const void *p1, const void *p2) ! 431: { ! 432: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].count; ! 433: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].count; ! 434: if (count1 > count2) { ! 435: return -1; ! 436: } ! 437: if (count1 < count2) { ! 438: return 1; ! 439: } ! 440: return 0; ! 441: } ! 442: ! 443: /** ! 444: * Sort CPU profile data addresses by call counts and show the results. ! 445: * If symbols are requested and symbols are loaded, show (only) addresses ! 446: * matching a symbol. ! 447: */ ! 448: void Profile_CpuShowCounts(int show, bool only_symbols) ! 449: { ! 450: cpu_profile_item_t *data = cpu_profile.data; ! 451: int symbols, matched, active; ! 452: int oldcols[DISASM_COLUMNS]; ! 453: Uint32 *sort_arr, *end, addr, nextpc; ! 454: const char *name; ! 455: float percentage; ! 456: Uint32 count; ! 457: ! 458: if (!data) { ! 459: fprintf(stderr, "ERROR: no CPU profiling data available!\n"); ! 460: return; ! 461: } ! 462: active = cpu_profile.active; ! 463: show = (show < active ? show : active); ! 464: ! 465: sort_arr = cpu_profile.sort_arr; ! 466: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_count); ! 467: ! 468: if (!only_symbols) { ! 469: leave_instruction_column(oldcols); ! 470: printf("addr:\t\tcount:\n"); ! 471: for (end = sort_arr + show; sort_arr < end; sort_arr++) { ! 472: addr = index2address(*sort_arr); ! 473: count = data[*sort_arr].count; ! 474: percentage = 100.0*count/cpu_profile.all.count; ! 475: printf("0x%06x\t%5.2f%%\t%d%s\t", ! 476: addr, percentage, count, ! 477: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : ""); ! 478: Disasm(stdout, addr, &nextpc, 1); ! 479: } ! 480: printf("%d CPU addresses listed.\n", show); ! 481: Disasm_SetColumns(oldcols); ! 482: return; ! 483: } ! 484: ! 485: symbols = Symbols_CpuCount(); ! 486: if (!symbols) { ! 487: fprintf(stderr, "ERROR: no CPU symbols loaded!\n"); ! 488: return; ! 489: } ! 490: matched = 0; ! 491: ! 492: leave_instruction_column(oldcols); ! 493: ! 494: printf("addr:\t\tcount:\t\tsymbol:\n"); ! 495: for (end = sort_arr + active; sort_arr < end; sort_arr++) { ! 496: ! 497: addr = index2address(*sort_arr); ! 498: name = Symbols_GetByCpuAddress(addr); ! 499: if (!name) { ! 500: continue; ! 501: } ! 502: count = data[*sort_arr].count; ! 503: percentage = 100.0*count/cpu_profile.all.count; ! 504: printf("0x%06x\t%5.2f%%\t%d\t%s%s\t", ! 505: addr, percentage, count, name, ! 506: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : ""); ! 507: Disasm(stdout, addr, &nextpc, 1); ! 508: ! 509: matched++; ! 510: if (matched >= show || matched >= symbols) { ! 511: break; ! 512: } ! 513: } ! 514: printf("%d CPU symbols listed.\n", matched); ! 515: ! 516: Disasm_SetColumns(oldcols); ! 517: } ! 518: ! 519: ! 520: static const char * addr2name(Uint32 addr, Uint64 *total) ! 521: { ! 522: Uint32 idx = address2index(addr); ! 523: *total = cpu_profile.data[idx].count; ! 524: return Symbols_GetByCpuAddress(addr); ! 525: } ! 526: ! 527: /** ! 528: * Output CPU callers info to given file. ! 529: */ ! 530: void Profile_CpuShowCallers(FILE *fp) ! 531: { ! 532: Profile_ShowCallers(fp, cpu_callinfo.sites, cpu_callinfo.site, addr2name); ! 533: } ! 534: ! 535: /** ! 536: * Save CPU profile information to given file. ! 537: */ ! 538: void Profile_CpuSave(FILE *out) ! 539: { ! 540: Uint32 text; ! 541: fputs("Field names:\tExecuted instructions, Used cycles, Instruction cache misses\n", out); ! 542: /* (Python) pegexp that matches address and all describled fields from disassembly: ! 543: * $<hex> : <ASM> <percentage>% (<count>, <cycles>, <misses>) ! 544: * $e5af38 : rts 0.00% (12, 0, 12) ! 545: */ ! 546: fputs("Field regexp:\t^\\$([0-9a-f]+) :.*% \\((.*)\\)$\n", out); ! 547: /* some information for interpreting the addresses */ ! 548: fprintf(out, "ROM_TOS:\t0x%06x-0x%06x\n", TosAddress, TosAddress + TosSize); ! 549: text = DebugInfo_GetTEXT(); ! 550: if (text < TosAddress) { ! 551: fprintf(out, "PROGRAM_TEXT:\t0x%06x-0x%06x\n", text, DebugInfo_GetTEXTEnd()); ! 552: } ! 553: fprintf(out, "CARTRIDGE:\t0xfa0000-0xfc0000\n"); ! 554: Profile_CpuShowAddresses(0, 0xFC0000-2, out); ! 555: Profile_CpuShowCallers(out); ! 556: } ! 557: ! 558: /* ------------------ CPU profile control ----------------- */ ! 559: ! 560: /** ! 561: * Initialize CPU profiling when necessary. Return true if profiling. ! 562: */ ! 563: bool Profile_CpuStart(void) ! 564: { ! 565: int size; ! 566: ! 567: Profile_FreeCallinfo(&(cpu_callinfo)); ! 568: if (cpu_profile.sort_arr) { ! 569: /* remove previous results */ ! 570: free(cpu_profile.sort_arr); ! 571: free(cpu_profile.data); ! 572: cpu_profile.sort_arr = NULL; ! 573: cpu_profile.data = NULL; ! 574: printf("Freed previous CPU profile buffers.\n"); ! 575: } ! 576: if (!cpu_profile.enabled) { ! 577: return false; ! 578: } ! 579: /* zero everything */ ! 580: memset(&cpu_profile, 0, sizeof(cpu_profile)); ! 581: ! 582: /* Shouldn't change within same debug session */ ! 583: size = (STRamEnd + 0x20000 + TosSize) / 2; ! 584: ! 585: /* Add one entry for catching invalid PC values */ ! 586: cpu_profile.data = calloc(size + 1, sizeof(*cpu_profile.data)); ! 587: if (!cpu_profile.data) { ! 588: perror("ERROR, new CPU profile buffer alloc failed"); ! 589: return false; ! 590: } ! 591: printf("Allocated CPU profile buffer (%d MB).\n", ! 592: (int)sizeof(*cpu_profile.data)*size/(1024*1024)); ! 593: cpu_profile.size = size; ! 594: ! 595: Profile_AllocCallinfo(&(cpu_callinfo), Symbols_CpuCount(), "CPU"); ! 596: ! 597: /* special hack for EmuTOS */ ! 598: etos_switcher = PC_UNDEFINED; ! 599: if (cpu_callinfo.sites && bIsEmuTOS && ! 600: (!Symbols_GetCpuAddress(SYMTYPE_TEXT, "_switchto", &etos_switcher) || etos_switcher < TosAddress)) { ! 601: etos_switcher = PC_UNDEFINED; ! 602: } ! 603: ! 604: cpu_profile.prev_cycles = Cycles_GetCounter(CYCLES_COUNTER_CPU); ! 605: cpu_profile.prev_family = OpcodeFamily; ! 606: cpu_profile.prev_pc = M68000_GetPC() & 0xffffff; ! 607: ! 608: cpu_profile.disasm_addr = 0; ! 609: cpu_profile.processed = false; ! 610: cpu_profile.enabled = true; ! 611: return cpu_profile.enabled; ! 612: } ! 613: ! 614: /** ! 615: * return true if pc could be next instruction for previous pc ! 616: */ ! 617: static bool is_prev_instr(Uint32 prev_pc, Uint32 pc) ! 618: { ! 619: /* just moved to next instruction (1-2 words)? */ ! 620: if (prev_pc < pc && (pc - prev_pc) <= 10) { ! 621: return true; ! 622: } ! 623: return false; ! 624: } ! 625: ! 626: /** ! 627: * return caller instruction type classification ! 628: */ ! 629: static calltype_t cpu_opcode_type(int family, Uint32 prev_pc, Uint32 pc) ! 630: { ! 631: switch (family) { ! 632: ! 633: case i_JSR: ! 634: case i_BSR: ! 635: return CALL_SUBROUTINE; ! 636: ! 637: case i_RTS: ! 638: case i_RTR: ! 639: case i_RTD: ! 640: return CALL_SUBRETURN; ! 641: ! 642: case i_JMP: /* often used also for "inlined" function calls... */ ! 643: case i_Bcc: /* both BRA & BCC */ ! 644: case i_FBcc: ! 645: case i_DBcc: ! 646: case i_FDBcc: ! 647: return CALL_BRANCH; ! 648: ! 649: case i_TRAP: ! 650: case i_TRAPV: ! 651: case i_TRAPcc: ! 652: case i_FTRAPcc: ! 653: case i_STOP: ! 654: case i_ILLG: ! 655: case i_CHK: ! 656: case i_CHK2: ! 657: case i_BKPT: ! 658: return CALL_EXCEPTION; ! 659: ! 660: case i_RTE: ! 661: return CALL_EXCRETURN; ! 662: } ! 663: /* just moved to next instruction? */ ! 664: if (is_prev_instr(prev_pc, pc)) { ! 665: return CALL_NEXT; ! 666: } ! 667: return CALL_UNKNOWN; ! 668: } ! 669: ! 670: /** ! 671: * If call tracking is enabled (there are symbols), collect ! 672: * information about subroutine and other calls, and their costs. ! 673: * ! 674: * Like with profile data, caller info checks need to be for previous ! 675: * instruction, that's why "pc" argument for this function actually ! 676: * needs to be previous PC. ! 677: */ ! 678: static void collect_calls(Uint32 pc, counters_t *counters) ! 679: { ! 680: calltype_t flag; ! 681: int idx, family; ! 682: Uint32 prev_pc, caller_pc; ! 683: ! 684: family = cpu_profile.prev_family; ! 685: cpu_profile.prev_family = OpcodeFamily; ! 686: ! 687: prev_pc = cpu_callinfo.prev_pc; ! 688: cpu_callinfo.prev_pc = pc; ! 689: caller_pc = PC_UNDEFINED; ! 690: ! 691: /* address is return address for last subroutine call? */ ! 692: if (unlikely(pc == cpu_callinfo.return_pc) && likely(cpu_callinfo.depth)) { ! 693: ! 694: flag = cpu_opcode_type(family, prev_pc, pc); ! 695: /* previous address can be exception return (RTE) if exception ! 696: * occurred right after returning from subroutine call (RTS) ! 697: */ ! 698: if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) { ! 699: caller_pc = Profile_CallEnd(&cpu_callinfo, counters); ! 700: } else { ! 701: #if DEBUG ! 702: /* although at return address, it didn't return yet, ! 703: * e.g. because there was a jsr or jump to return address ! 704: */ ! 705: Uint32 nextpc; ! 706: fprintf(stderr, "WARNING: subroutine call returned 0x%x -> 0x%x, not though RTS!\n", prev_pc, pc); ! 707: Disasm(stderr, prev_pc, &nextpc, 1); ! 708: #endif ! 709: } ! 710: /* next address might be another function, so need to fall through */ ! 711: } ! 712: ! 713: /* address is one which we're tracking? */ ! 714: idx = Symbols_GetCpuAddressIndex(pc); ! 715: if (unlikely(idx >= 0)) { ! 716: ! 717: flag = cpu_opcode_type(family, prev_pc, pc); ! 718: if (flag == CALL_SUBROUTINE) { ! 719: /* special HACK for for EmuTOS AES switcher which ! 720: * changes stack content to remove itself from call ! 721: * stack and uses RTS for subroutine *calls*, not ! 722: * for returning from them. ! 723: * ! 724: * It wouldn't be reliable to detect calls from it, ! 725: * so I'm making call *to* it show up as branch, to ! 726: * keep callstack depth correct. ! 727: */ ! 728: if (unlikely(pc == etos_switcher)) { ! 729: flag = CALL_BRANCH; ! 730: } else if (unlikely(prev_pc == PC_UNDEFINED)) { ! 731: /* if first profiled instruction ! 732: * is subroutine call, it doesn't have ! 733: * valid prev_pc value stored ! 734: */ ! 735: cpu_callinfo.return_pc = PC_UNDEFINED; ! 736: fprintf(stderr, "WARNING: previous PC from callinfo for 0x%d is undefined!\n", pc); ! 737: #if DEBUG ! 738: skip_assert = true; ! 739: DebugUI(REASON_CPU_EXCEPTION); ! 740: #endif ! 741: } else { ! 742: /* slow! */ ! 743: cpu_callinfo.return_pc = Disasm_GetNextPC(prev_pc); ! 744: } ! 745: } else if (caller_pc != PC_UNDEFINED) { ! 746: /* returned from function to first instrction of another symbol: ! 747: * 0xf384 jsr some_function ! 748: * other_symbol: ! 749: * 0f3x8a some_instruction ! 750: * -> change return instruction address to ! 751: * address of what did the returned call. ! 752: */ ! 753: prev_pc = caller_pc; ! 754: assert(is_prev_instr(prev_pc, pc)); ! 755: flag = CALL_NEXT; ! 756: } ! 757: Profile_CallStart(idx, &cpu_callinfo, prev_pc, flag, pc, counters); ! 758: } ! 759: } ! 760: ! 761: /** ! 762: * Update CPU cycle and count statistics for PC address. ! 763: * ! 764: * This gets called after instruction has executed and PC ! 765: * has advanced to next instruction. ! 766: */ ! 767: void Profile_CpuUpdate(void) ! 768: { ! 769: counters_t *counters = &(cpu_profile.all); ! 770: Uint32 pc, prev_pc, idx, cycles, misses; ! 771: cpu_profile_item_t *prev; ! 772: ! 773: prev_pc = cpu_profile.prev_pc; ! 774: /* PC may have extra bits, they need to be masked away as ! 775: * emulation itself does that too when PC value is used ! 776: */ ! 777: cpu_profile.prev_pc = pc = M68000_GetPC() & 0xffffff; ! 778: ! 779: idx = address2index(prev_pc); ! 780: assert(idx <= cpu_profile.size); ! 781: prev = cpu_profile.data + idx; ! 782: ! 783: if (likely(prev->count < MAX_CPU_PROFILE_VALUE)) { ! 784: prev->count++; ! 785: } ! 786: ! 787: #if USE_CYCLES_COUNTER ! 788: /* Confusingly, with DSP enabled, cycle counter is for this instruction, ! 789: * without DSP enabled, it's a monotonically increasing counter. ! 790: */ ! 791: if (bDspEnabled) { ! 792: cycles = Cycles_GetCounter(CYCLES_COUNTER_CPU); ! 793: } else { ! 794: Uint32 newcycles = Cycles_GetCounter(CYCLES_COUNTER_CPU); ! 795: cycles = newcycles - cpu_profile.prev_cycles; ! 796: cpu_profile.prev_cycles = newcycles; ! 797: } ! 798: #else ! 799: cycles = CurrentInstrCycles + nWaitStateCycles; ! 800: #endif ! 801: /* cycles are based on 8Mhz clock, change them to correct one */ ! 802: cycles <<= nCpuFreqShift; ! 803: ! 804: if (likely(prev->cycles < MAX_CPU_PROFILE_VALUE - cycles)) { ! 805: prev->cycles += cycles; ! 806: } else { ! 807: prev->cycles = MAX_CPU_PROFILE_VALUE; ! 808: } ! 809: ! 810: #if ENABLE_WINUAE_CPU ! 811: misses = CpuInstruction.iCacheMisses; ! 812: assert(misses < MAX_MISS); ! 813: cpu_profile.miss_counts[misses]++; ! 814: if (likely(prev->misses < MAX_CPU_PROFILE_VALUE - misses)) { ! 815: prev->misses += misses; ! 816: } else { ! 817: prev->misses = MAX_CPU_PROFILE_VALUE; ! 818: } ! 819: #else ! 820: misses = 0; ! 821: #endif ! 822: if (cpu_callinfo.sites) { ! 823: collect_calls(prev_pc, counters); ! 824: } ! 825: /* counters are increased after caller info is processed, ! 826: * otherwise cost for the instruction calling the callee ! 827: * doesn't get accounted to caller (but callee). ! 828: */ ! 829: counters->misses += misses; ! 830: counters->cycles += cycles; ! 831: counters->count++; ! 832: ! 833: #if DEBUG ! 834: if (unlikely(OpcodeFamily == 0)) { ! 835: Uint32 nextpc; ! 836: fputs("WARNING: instruction opcode family is zero (=i_ILLG) for instruction:\n", stderr); ! 837: Disasm(stderr, prev_pc, &nextpc, 1); ! 838: } ! 839: /* catch too large (and negative) cycles for other than STOP instruction */ ! 840: if (unlikely(cycles > 512 && OpcodeFamily != i_STOP)) { ! 841: Uint32 nextpc; ! 842: fprintf(stderr, "WARNING: cycles %d > 512:\n", cycles); ! 843: Disasm(stderr, prev_pc, &nextpc, 1); ! 844: } ! 845: if (unlikely(cycles == 0)) { ! 846: Uint32 nextpc; ! 847: fputs("WARNING: Zero cycles for an opcode:\n", stderr); ! 848: Disasm(stderr, prev_pc, &nextpc, 1); ! 849: } ! 850: #endif ! 851: } ! 852: ! 853: ! 854: /** ! 855: * Helper for accounting CPU profile area item. ! 856: */ ! 857: static void update_area_item(profile_area_t *area, Uint32 addr, cpu_profile_item_t *item) ! 858: { ! 859: Uint32 cycles = item->cycles; ! 860: Uint32 count = item->count; ! 861: ! 862: if (!count) { ! 863: return; ! 864: } ! 865: area->counters.count += count; ! 866: area->counters.misses += item->misses; ! 867: area->counters.cycles += cycles; ! 868: ! 869: if (cycles == MAX_CPU_PROFILE_VALUE) { ! 870: area->overflow = true; ! 871: } ! 872: if (addr < area->lowest) { ! 873: area->lowest = addr; ! 874: } ! 875: area->highest = addr; ! 876: ! 877: area->active++; ! 878: } ! 879: ! 880: /** ! 881: * Helper for collecting CPU profile area statistics. ! 882: */ ! 883: static Uint32 update_area(profile_area_t *area, Uint32 start, Uint32 end) ! 884: { ! 885: cpu_profile_item_t *item; ! 886: Uint32 addr; ! 887: ! 888: memset(area, 0, sizeof(profile_area_t)); ! 889: area->lowest = cpu_profile.size; ! 890: ! 891: item = &(cpu_profile.data[start]); ! 892: for (addr = start; addr < end; addr++, item++) { ! 893: update_area_item(area, addr, item); ! 894: } ! 895: return addr; ! 896: } ! 897: ! 898: /** ! 899: * Helper for initializing CPU profile area sorting indexes. ! 900: */ ! 901: static Uint32* index_area(profile_area_t *area, Uint32 *sort_arr) ! 902: { ! 903: cpu_profile_item_t *item; ! 904: Uint32 addr; ! 905: ! 906: item = &(cpu_profile.data[area->lowest]); ! 907: for (addr = area->lowest; addr <= area->highest; addr++, item++) { ! 908: if (item->count) { ! 909: *sort_arr++ = addr; ! 910: } ! 911: } ! 912: return sort_arr; ! 913: } ! 914: ! 915: /** ! 916: * Stop and process the CPU profiling data; collect stats and ! 917: * prepare for more optimal sorting. ! 918: */ ! 919: void Profile_CpuStop(void) ! 920: { ! 921: Uint32 *sort_arr, next; ! 922: int active; ! 923: ! 924: if (cpu_profile.processed || !cpu_profile.enabled) { ! 925: return; ! 926: } ! 927: /* user didn't change RAM or TOS size in the meanwhile? */ ! 928: assert(cpu_profile.size == (STRamEnd + 0x20000 + TosSize) / 2); ! 929: ! 930: Profile_FinalizeCalls(&(cpu_callinfo), &(cpu_profile.all), Symbols_GetByCpuAddress); ! 931: ! 932: /* find lowest and highest addresses executed etc */ ! 933: next = update_area(&cpu_profile.ram, 0, STRamEnd/2); ! 934: next = update_area(&cpu_profile.tos, next, (STRamEnd + TosSize)/2); ! 935: next = update_area(&cpu_profile.rom, next, cpu_profile.size); ! 936: assert(next == cpu_profile.size); ! 937: ! 938: #if DEBUG ! 939: if (skip_assert) { ! 940: skip_assert = false; ! 941: } else ! 942: #endif ! 943: { ! 944: assert(cpu_profile.all.misses == cpu_profile.ram.counters.misses + cpu_profile.tos.counters.misses + cpu_profile.rom.counters.misses); ! 945: assert(cpu_profile.all.cycles == cpu_profile.ram.counters.cycles + cpu_profile.tos.counters.cycles + cpu_profile.rom.counters.cycles); ! 946: assert(cpu_profile.all.count == cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count); ! 947: } ! 948: ! 949: /* allocate address array for sorting */ ! 950: active = cpu_profile.ram.active + cpu_profile.rom.active + cpu_profile.tos.active; ! 951: sort_arr = calloc(active, sizeof(*sort_arr)); ! 952: ! 953: if (!sort_arr) { ! 954: perror("ERROR: allocating CPU profile address data"); ! 955: free(cpu_profile.data); ! 956: cpu_profile.data = NULL; ! 957: return; ! 958: } ! 959: printf("Allocated CPU profile address buffer (%d KB).\n", ! 960: (int)sizeof(*sort_arr)*(active+512)/1024); ! 961: cpu_profile.sort_arr = sort_arr; ! 962: cpu_profile.active = active; ! 963: ! 964: /* and fill addresses for used instructions... */ ! 965: sort_arr = index_area(&cpu_profile.ram, sort_arr); ! 966: sort_arr = index_area(&cpu_profile.tos, sort_arr); ! 967: sort_arr = index_area(&cpu_profile.rom, sort_arr); ! 968: assert(sort_arr == cpu_profile.sort_arr + cpu_profile.active); ! 969: //printf("%d/%d/%d\n", area->active, sort_arr-cpu_profile.sort_arr, active); ! 970: ! 971: Profile_CpuShowStats(); ! 972: cpu_profile.processed = true; ! 973: } ! 974: ! 975: /** ! 976: * Get pointers to CPU profile enabling and disasm address variables ! 977: * for updating them (in parser). ! 978: */ ! 979: void Profile_CpuGetPointers(bool **enabled, Uint32 **disasm_addr) ! 980: { ! 981: *disasm_addr = &cpu_profile.disasm_addr; ! 982: *enabled = &cpu_profile.enabled; ! 983: } ! 984: ! 985: /** ! 986: * Get callinfo & symbol search pointers for stack walking. ! 987: */ ! 988: void Profile_CpuGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32)) ! 989: { ! 990: *callinfo = &(cpu_callinfo); ! 991: *get_symbol = Symbols_GetByCpuAddress; ! 992: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.