|
|
1.1 root 1: /*
2: * Hatari - profilecpu.c
3: *
1.1.1.3 root 4: * Copyright (C) 2010-2015 by Eero Tamminen
1.1 root 5: *
6: * This file is distributed under the GNU General Public License, version 2
7: * or at your option any later version. Read the file gpl.txt for details.
8: *
9: * profilecpu.c - functions for profiling CPU and showing the results.
10: */
11: const char Profilecpu_fileid[] = "Hatari profilecpu.c : " __DATE__ " " __TIME__;
12:
13: #include <stdio.h>
14: #include <inttypes.h>
15: #include <assert.h>
16: #include "main.h"
17: #include "configuration.h"
18: #include "clocks_timings.h"
19: #include "debugInfo.h"
20: #include "dsp.h"
21: #include "m68000.h"
22: #include "68kDisass.h"
1.1.1.5 ! root 23: #include "symbols.h"
1.1 root 24: #include "profile.h"
25: #include "profile_priv.h"
1.1.1.5 ! root 26: #include "debug_priv.h"
1.1 root 27: #include "stMemory.h"
28: #include "tos.h"
1.1.1.2 root 29: #include "screen.h"
30: #include "video.h"
31:
32:
33: /* cartridge area */
34: #define CART_START 0xFA0000
35: #define CART_END 0xFC0000
36: #define CART_SIZE (CART_END - CART_START)
37:
1.1.1.3 root 38: #define TTRAM_START 0x01000000
1.1 root 39:
40: /* if non-zero, output (more) warnings on suspicious:
41: * - cycle/instruction counts
42: * - PC switches
1.1.1.3 root 43: * And drop to debugger on invalid current & previous PC addresses.
44: *
45: * NOTE: DebugUI() calls that DEBUG define enables, can cause
46: * instruction count mismatch assertions because debugger invocation
47: * resets the counters AND happens in middle of data collection.
48: * It's best to quit after debugging the issue ('q' command).
1.1 root 49: */
50: #define DEBUG 0
51: #if DEBUG
52: #include "debugui.h"
53: static bool skip_assert;
54: #endif
55:
1.1.1.5 ! root 56: /* whether to track & show all cache stats for all instructions */
! 57: #define DEBUG_CACHE 0
! 58:
! 59:
1.1 root 60: static callinfo_t cpu_callinfo;
61:
62: #define MAX_CPU_PROFILE_VALUE 0xFFFFFFFF
63:
64: typedef struct {
1.1.1.5 ! root 65: Uint32 count; /* how many times this address instruction is executed */
1.1 root 66: Uint32 cycles; /* how many CPU cycles was taken at this address */
1.1.1.5 ! root 67: #if DEBUG_CACHE /* track also less relevant cache events */
! 68: Uint32 i_hits; /* how many CPU i-cache hits happened at this address */
! 69: Uint32 d_misses; /* how many CPU d-cache misses happened at this address */
! 70: #endif
! 71: Uint32 i_misses; /* how many CPU i-cache misses happened at this address */
! 72: Uint32 d_hits; /* how many CPU d-cache hits happened at this address */
1.1 root 73: } cpu_profile_item_t;
74:
1.1.1.5 ! root 75:
! 76: /* max count of hits/misses single instruction can trigger at once */
1.1.1.3 root 77: #define MAX_I_HITS 8
78: #define MAX_I_MISSES 8
79: #define MAX_D_HITS 32
80: #define MAX_D_MISSES 20
1.1 root 81:
82: static struct {
83: counters_t all; /* total counts for all areas */
84: cpu_profile_item_t *data; /* profile data items */
85: Uint32 size; /* number of allocated profile data items */
1.1.1.3 root 86: profile_area_t ttram; /* TT-RAM stats */
1.1 root 87: profile_area_t ram; /* normal RAM stats */
88: profile_area_t rom; /* cartridge ROM stats */
89: profile_area_t tos; /* ROM TOS stats */
90: int active; /* number of active data items in all areas */
91: Uint32 *sort_arr; /* data indexes used for sorting */
1.1.1.2 root 92: int prev_family; /* previous instruction opcode family */
1.1.1.3 root 93: Uint64 prev_cycles; /* previous instruction cycles counter */
1.1 root 94: Uint32 prev_pc; /* previous instruction address */
1.1.1.2 root 95: Uint32 loop_start; /* address of last loop start */
96: Uint32 loop_end; /* address of last loop end */
97: Uint32 loop_count; /* how many times it was looped */
1.1 root 98: Uint32 disasm_addr; /* 'addresses' command start address */
1.1.1.3 root 99: #if ENABLE_WINUAE_CPU
1.1.1.5 ! root 100: Uint32 i_prefetched; /* instructions that don't incur prefetch hit/miss */
1.1.1.3 root 101: Uint32 i_hit_counts[MAX_I_HITS]; /* I-cache hit counts */
102: Uint32 d_hit_counts[MAX_D_HITS]; /* D-cache hit counts */
103: Uint32 i_miss_counts[MAX_I_MISSES]; /* I-cache miss counts */
104: Uint32 d_miss_counts[MAX_D_MISSES]; /* D-cache miss counts */
105: #endif
1.1 root 106: bool processed; /* true when data is already processed */
107: bool enabled; /* true when profiling enabled */
108: } cpu_profile;
109:
110: /* special hack for EmuTOS */
111: static Uint32 etos_switcher;
112:
113:
114: /* ------------------ CPU profile address mapping ----------------- */
115:
116: /**
117: * convert Atari memory address to sorting array profile data index.
118: */
119: static inline Uint32 address2index(Uint32 pc)
120: {
121: if (unlikely(pc & 1)) {
122: fprintf(stderr, "WARNING: odd CPU profile instruction address 0x%x!\n", pc);
123: #if DEBUG
124: skip_assert = true;
125: DebugUI(REASON_CPU_EXCEPTION);
126: #endif
127: }
1.1.1.2 root 128: if (pc < STRamEnd) {
129: /* most likely case, use RAM address as-is */
130:
131: } else if (pc >= TosAddress && pc < TosAddress + TosSize) {
1.1 root 132: /* TOS, put it after RAM data */
133: pc = pc - TosAddress + STRamEnd;
1.1.1.2 root 134: if (TosAddress >= CART_END) {
135: /* and after cartridge data as it's higher */
136: pc += CART_SIZE;
137: }
138: } else if (pc >= CART_START && pc < CART_END) {
139: /* ROM, put it after RAM data */
140: pc = pc - CART_START + STRamEnd;
141: if (TosAddress < CART_START) {
142: /* and after TOS as it's higher */
143: pc += TosSize;
144: }
1.1.1.3 root 145: #if ENABLE_WINUAE_CPU
1.1.1.5 ! root 146: } else if (TTmemory && pc >= TTRAM_START && pc < TTRAM_START + 1024*(unsigned)ConfigureParams.Memory.TTRamSize_KB) {
1.1.1.3 root 147: pc += STRamEnd + TosSize + CART_SIZE - TTRAM_START;
148: #endif
1.1 root 149: } else {
1.1.1.2 root 150: fprintf(stderr, "WARNING: 'invalid' CPU PC profile instruction address 0x%x!\n", pc);
151: /* extra entry at end is reserved for invalid PC values */
1.1.1.3 root 152: pc = STRamEnd + TosSize + CART_SIZE;
1.1 root 153: #if DEBUG
1.1.1.2 root 154: skip_assert = true;
155: DebugUI(REASON_CPU_EXCEPTION);
1.1 root 156: #endif
157: }
158: /* CPU instructions are at even addresses, save space by halving */
159: return (pc >> 1);
160: }
161:
162: /**
163: * convert sorting array profile data index to Atari memory address.
164: */
165: static Uint32 index2address(Uint32 idx)
166: {
167: idx <<= 1;
168: /* RAM */
169: if (idx < STRamEnd) {
170: return idx;
171: }
172: idx -= STRamEnd;
1.1.1.2 root 173: /* TOS before cartridge area? */
174: if (TosAddress < CART_START) {
175: /* TOS */
176: if (idx < TosSize) {
177: return idx + TosAddress;
178: }
179: idx -= TosSize;
180: /* ROM */
1.1.1.3 root 181: if (idx < CART_SIZE) {
182: return idx + CART_START;
183: }
184: idx -= CART_SIZE;
1.1.1.2 root 185: } else {
186: /* ROM */
187: if (idx < CART_SIZE) {
188: return idx + CART_START;
189: }
190: idx -= CART_SIZE;
191: /* TOS */
1.1.1.3 root 192: if (idx < TosSize) {
193: return idx + TosAddress;
194: }
195: idx -= TosSize;
1.1 root 196: }
1.1.1.3 root 197: return idx + TTRAM_START;
1.1 root 198: }
199:
200: /* ------------------ CPU profile results ----------------- */
201:
202: /**
1.1.1.5 ! root 203: * Write string containing CPU cache stats, cycles, count, count percentage
! 204: * for given address to provided buffer.
! 205: *
1.1 root 206: * Return true if data was available and non-zero, false otherwise.
207: */
1.1.1.5 ! root 208: bool Profile_CpuAddressDataStr(char *buffer, size_t maxlen, Uint32 addr)
1.1 root 209: {
1.1.1.5 ! root 210: cpu_profile_item_t *item;
! 211: float percentage;
1.1 root 212: Uint32 idx;
1.1.1.5 ! root 213:
! 214: assert(buffer && maxlen);
1.1 root 215: if (!cpu_profile.data) {
216: return false;
217: }
218: idx = address2index(addr);
1.1.1.5 ! root 219: item = &(cpu_profile.data[idx]);
! 220:
1.1 root 221: if (cpu_profile.all.count) {
1.1.1.5 ! root 222: percentage = 100.0 * item->count / cpu_profile.all.count;
1.1 root 223: } else {
1.1.1.5 ! root 224: percentage = 0.0;
1.1 root 225: }
1.1.1.5 ! root 226: #if DEBUG_CACHE
! 227: snprintf(buffer, maxlen, "%5.2f%% (%u, %u, %u, %u, %u, %u)",
! 228: percentage, item->count, item->cycles,
! 229: item->i_hits, item->i_misses,
! 230: item->d_hits, item->d_misses);
! 231: #else
! 232: snprintf(buffer, maxlen, "%5.2f%% (%u, %u, %u, %u)",
! 233: percentage, item->count, item->cycles,
! 234: item->i_misses, item->d_hits);
! 235: #endif
! 236: return (item->count > 0);
1.1 root 237: }
238:
239: /**
240: * Helper to show statistics for specified CPU profile area.
241: */
242: static void show_cpu_area_stats(profile_area_t *area)
243: {
244: if (!area->active) {
245: fprintf(stderr, "- no activity\n");
246: return;
247: }
248: fprintf(stderr, "- active address range:\n 0x%06x-0x%06x\n",
249: index2address(area->lowest),
250: index2address(area->highest));
1.1.1.5 ! root 251: fprintf(stderr, "- active instruction addresses:\n %d (%.2f%% of all areas)\n",
1.1 root 252: area->active,
253: 100.0 * area->active / cpu_profile.active);
1.1.1.5 ! root 254: fprintf(stderr, "- executed instructions:\n %"PRIu64" (%.2f%% of all areas)\n",
1.1 root 255: area->counters.count,
256: 100.0 * area->counters.count / cpu_profile.all.count);
1.1.1.3 root 257: /* CPU cache in use? */
258: if (cpu_profile.all.i_misses) {
1.1.1.5 ! root 259: fprintf(stderr, "- instruction cache misses:\n %"PRIu64" (%.2f%% of all areas)\n",
1.1.1.3 root 260: area->counters.i_misses,
261: 100.0 * area->counters.i_misses / cpu_profile.all.i_misses);
262: }
263: if (cpu_profile.all.d_hits) {
1.1.1.5 ! root 264: fprintf(stderr, "- data cache hits:\n %"PRIu64" (%.2f%% of all areas)\n",
1.1.1.3 root 265: area->counters.d_hits,
266: 100.0 * area->counters.d_hits / cpu_profile.all.d_hits);
1.1 root 267: }
1.1.1.5 ! root 268: fprintf(stderr, "- used cycles:\n %"PRIu64" (%.2f%% of all areas)\n = %.5fs\n",
1.1 root 269: area->counters.cycles,
270: 100.0 * area->counters.cycles / cpu_profile.all.cycles,
1.1.1.5 ! root 271: (double)area->counters.cycles / MachineClocks.CPU_Freq_Emul);
1.1 root 272: if (area->overflow) {
273: fprintf(stderr, " *** COUNTER OVERFLOW! ***\n");
274: }
275: }
276:
277:
278: /**
279: * show CPU area (RAM, ROM, TOS) specific statistics.
280: */
281: void Profile_CpuShowStats(void)
282: {
283: fprintf(stderr, "Normal RAM (0-0x%X):\n", STRamEnd);
284: show_cpu_area_stats(&cpu_profile.ram);
285:
286: fprintf(stderr, "ROM TOS (0x%X-0x%X):\n", TosAddress, TosAddress + TosSize);
287: show_cpu_area_stats(&cpu_profile.tos);
288:
1.1.1.2 root 289: fprintf(stderr, "Cartridge ROM (0x%X-%X):\n", CART_START, CART_END);
1.1 root 290: show_cpu_area_stats(&cpu_profile.rom);
291:
1.1.1.5 ! root 292: if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) {
! 293: fprintf(stderr, "TT-RAM (0x%X-%X):\n", TTRAM_START, TTRAM_START + 1024*ConfigureParams.Memory.TTRamSize_KB);
1.1.1.3 root 294: show_cpu_area_stats(&cpu_profile.ttram);
295: }
296:
1.1 root 297: fprintf(stderr, "\n= %.5fs\n",
1.1.1.5 ! root 298: (double)cpu_profile.all.cycles / MachineClocks.CPU_Freq_Emul);
1.1.1.3 root 299: }
1.1 root 300:
301: #if ENABLE_WINUAE_CPU
1.1.1.3 root 302: /**
303: * show percentage histogram of given array items
304: */
305: static void show_histogram(const char *title, int count, Uint32 *items)
306: {
1.1.1.5 ! root 307: const Uint64 maxval = cpu_profile.all.count;
1.1.1.3 root 308: Uint32 value;
309: int i;
310:
1.1.1.5 ! root 311: fprintf(stderr, "\n%s, number of occurrences:\n", title);
1.1.1.3 root 312: for (i = 0; i < count; i++) {
313: value = items[i];
314: if (value) {
315: int w, width = 50 * value / maxval+1;
316: fprintf(stderr, " %2d: ", i);
317: for (w = 0; w < width; w++) {
318: fputc('#', stderr);
319: }
320: fprintf(stderr, " %.3f%%\n", 100.0 * value / maxval);
1.1 root 321: }
322: }
323: }
324:
325: /**
1.1.1.3 root 326: * show CPU cache usage histograms
327: */
328: void Profile_CpuShowCaches(void)
329: {
330: if (!(cpu_profile.all.i_misses || cpu_profile.all.d_hits)) {
1.1.1.5 ! root 331: fprintf(stderr, "No instruction/data cache information.\n");
1.1.1.3 root 332: return;
333: }
1.1.1.5 ! root 334: fprintf(stderr,
! 335: "\nNote:\n"
! 336: "- these statistics include all profiled instructions, but\n"
! 337: "- instruction cache events happen only on prefetch/branch\n"
! 338: "- data cache events can happen only for instructions that do memory reads\n"
! 339: "\nAlready prefetched instructions: %.3f%% (no hits/misses)\n",
! 340: 100.0 * cpu_profile.i_prefetched / cpu_profile.all.count);
! 341:
1.1.1.3 root 342: show_histogram("Instruction cache hits per instruction",
1.1.1.4 root 343: ARRAY_SIZE(cpu_profile.i_hit_counts), cpu_profile.i_hit_counts);
1.1.1.3 root 344: show_histogram("Instruction cache misses per instruction",
1.1.1.4 root 345: ARRAY_SIZE(cpu_profile.i_miss_counts), cpu_profile.i_miss_counts);
1.1.1.3 root 346: show_histogram("Data cache hits per instruction",
1.1.1.4 root 347: ARRAY_SIZE(cpu_profile.d_hit_counts), cpu_profile.d_hit_counts);
1.1.1.3 root 348: show_histogram("Data cache misses per instruction",
1.1.1.4 root 349: ARRAY_SIZE(cpu_profile.d_miss_counts), cpu_profile.d_miss_counts);
1.1.1.3 root 350: }
351: #else
352: void Profile_CpuShowCaches(void) {
353: fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
354: }
355: #endif
356:
357: /**
1.1 root 358: * Show CPU instructions which execution was profiled, in the address order,
359: * starting from the given address. Return next disassembly address.
360: */
1.1.1.5 ! root 361: Uint32 Profile_CpuShowAddresses(Uint32 lower, Uint32 upper, FILE *out, paging_t use_paging)
1.1 root 362: {
363: int oldcols[DISASM_COLUMNS], newcols[DISASM_COLUMNS];
1.1.1.5 ! root 364: int show, shown, addrs, active;
1.1 root 365: const char *symbol;
366: cpu_profile_item_t *data;
367: Uint32 idx, end, size;
368: uaecptr nextpc, addr;
369:
370: data = cpu_profile.data;
371: if (!data) {
372: fprintf(stderr, "ERROR: no CPU profiling data available!\n");
373: return 0;
374: }
375:
376: size = cpu_profile.size;
377: active = cpu_profile.active;
378: if (upper) {
379: end = address2index(upper);
380: show = active;
381: if (end > size) {
382: end = size;
383: }
384: } else {
385: end = size;
1.1.1.5 ! root 386: show = DebugUI_GetPageLines(ConfigureParams.Debugger.nDisasmLines, 0);
1.1 root 387: if (!show || show > active) {
388: show = active;
389: }
390: }
1.1.1.5 ! root 391: if (use_paging == PAGING_DISABLED) {
! 392: show = INT_MAX;
! 393: }
1.1 root 394:
395: /* get/change columns */
396: Disasm_GetColumns(oldcols);
397: Disasm_DisableColumn(DISASM_COLUMN_HEXDUMP, oldcols, newcols);
398: Disasm_SetColumns(newcols);
399:
1.1.1.3 root 400: fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <sum of i-cache misses>, <sum of d-cache hits>)\n", out);
1.1.1.5 ! root 401: shown = 2; /* first and last printf */
1.1 root 402:
1.1.1.5 ! root 403: addrs = nextpc = 0;
1.1 root 404: idx = address2index(lower);
1.1.1.5 ! root 405: for (; shown < show && idx < end; idx++) {
1.1 root 406: if (!data[idx].count) {
407: continue;
408: }
409: addr = index2address(idx);
410: if (addr != nextpc && nextpc) {
411: fprintf(out, "[...]\n");
1.1.1.5 ! root 412: shown++;
1.1 root 413: }
1.1.1.5 ! root 414: symbol = Symbols_GetByCpuAddress(addr, SYMTYPE_TEXT);
1.1 root 415: if (symbol) {
416: fprintf(out, "%s:\n", symbol);
1.1.1.5 ! root 417: shown++;
1.1 root 418: }
419: /* NOTE: column setup works only with 68kDisass disasm engine! */
420: Disasm(out, addr, &nextpc, 1);
421: shown++;
1.1.1.5 ! root 422: addrs++;
1.1 root 423: }
1.1.1.5 ! root 424: printf("Disassembled %d (of active %d) CPU addresses.\n", addrs, active);
1.1 root 425:
426: /* restore disassembly columns */
427: Disasm_SetColumns(oldcols);
428: return nextpc;
429: }
430:
431: /**
432: * remove all disassembly columns except instruction ones.
433: * data needed to restore columns is stored to "oldcols"
434: */
435: static void leave_instruction_column(int *oldcols)
436: {
437: int i, newcols[DISASM_COLUMNS];
438:
439: Disasm_GetColumns(oldcols);
440: for (i = 0; i < DISASM_COLUMNS; i++) {
441: if (i == DISASM_COLUMN_OPCODE || i == DISASM_COLUMN_OPERAND) {
442: continue;
443: }
444: Disasm_DisableColumn(i, oldcols, newcols);
445: oldcols = newcols;
446: }
447: Disasm_SetColumns(newcols);
448: }
449:
450: #if ENABLE_WINUAE_CPU
451: /**
452: * compare function for qsort() to sort CPU profile data by instruction cache misses.
453: */
1.1.1.3 root 454: static int cmp_cpu_i_misses(const void *p1, const void *p2)
1.1 root 455: {
1.1.1.3 root 456: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].i_misses;
457: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].i_misses;
1.1 root 458: if (count1 > count2) {
459: return -1;
460: }
461: if (count1 < count2) {
462: return 1;
463: }
464: return 0;
465: }
466:
467: /**
468: * Sort CPU profile data addresses by instruction cache misses and show the results.
469: */
1.1.1.3 root 470: void Profile_CpuShowInstrMisses(int show)
471: {
472: int active;
473: int oldcols[DISASM_COLUMNS];
474: Uint32 *sort_arr, *end, addr, nextpc;
475: cpu_profile_item_t *data = cpu_profile.data;
476: float percentage;
477: Uint32 count;
478:
479: if (!cpu_profile.all.i_misses) {
480: fprintf(stderr, "No CPU instruction cache miss information available.\n");
481: return;
482: }
483:
484: active = cpu_profile.active;
485: sort_arr = cpu_profile.sort_arr;
486: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_i_misses);
487:
488: leave_instruction_column(oldcols);
489:
490: printf("addr:\t\ti-cache misses:\n");
491: show = (show < active ? show : active);
492: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
493: addr = index2address(*sort_arr);
494: count = data[*sort_arr].i_misses;
495: percentage = 100.0*count/cpu_profile.all.i_misses;
496: printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
497: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
498: Disasm(stdout, addr, &nextpc, 1);
499: }
500: printf("%d CPU addresses listed.\n", show);
501:
502: Disasm_SetColumns(oldcols);
503: }
504:
505: /**
506: * compare function for qsort() to sort CPU profile data by data cache hits.
507: */
508: static int cmp_cpu_d_hits(const void *p1, const void *p2)
509: {
510: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].d_hits;
511: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].d_hits;
512: if (count1 > count2) {
513: return -1;
514: }
515: if (count1 < count2) {
516: return 1;
517: }
518: return 0;
519: }
520:
521: /**
522: * Sort CPU profile data addresses by data cache hits and show the results.
523: */
524: void Profile_CpuShowDataHits(int show)
1.1 root 525: {
526: int active;
527: int oldcols[DISASM_COLUMNS];
528: Uint32 *sort_arr, *end, addr, nextpc;
529: cpu_profile_item_t *data = cpu_profile.data;
530: float percentage;
531: Uint32 count;
532:
1.1.1.3 root 533: if (!cpu_profile.all.d_hits) {
534: fprintf(stderr, "No CPU data cache hit information available.\n");
1.1 root 535: return;
536: }
537:
538: active = cpu_profile.active;
539: sort_arr = cpu_profile.sort_arr;
1.1.1.3 root 540: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_d_hits);
1.1 root 541:
542: leave_instruction_column(oldcols);
543:
1.1.1.3 root 544: printf("addr:\t\td-cache hits:\n");
1.1 root 545: show = (show < active ? show : active);
546: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
547: addr = index2address(*sort_arr);
1.1.1.3 root 548: count = data[*sort_arr].d_hits;
549: percentage = 100.0*count/cpu_profile.all.d_hits;
1.1 root 550: printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
551: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
552: Disasm(stdout, addr, &nextpc, 1);
553: }
554: printf("%d CPU addresses listed.\n", show);
555:
556: Disasm_SetColumns(oldcols);
557: }
1.1.1.3 root 558:
1.1 root 559: #else
1.1.1.3 root 560: void Profile_CpuShowInstrMisses(int show) {
561: fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
562: }
563: void Profile_CpuShowDataHits(int show) {
564: fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
1.1 root 565: }
566: #endif
567:
568:
569: /**
570: * compare function for qsort() to sort CPU profile data by cycles counts.
571: */
572: static int cmp_cpu_cycles(const void *p1, const void *p2)
573: {
574: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].cycles;
575: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].cycles;
576: if (count1 > count2) {
577: return -1;
578: }
579: if (count1 < count2) {
580: return 1;
581: }
582: return 0;
583: }
584:
585: /**
586: * Sort CPU profile data addresses by cycle counts and show the results.
587: */
588: void Profile_CpuShowCycles(int show)
589: {
590: int active;
591: int oldcols[DISASM_COLUMNS];
592: Uint32 *sort_arr, *end, addr, nextpc;
593: cpu_profile_item_t *data = cpu_profile.data;
594: float percentage;
595: Uint32 count;
596:
597: if (!data) {
598: fprintf(stderr, "ERROR: no CPU profiling data available!\n");
599: return;
600: }
601:
602: active = cpu_profile.active;
603: sort_arr = cpu_profile.sort_arr;
604: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_cycles);
605:
606: leave_instruction_column(oldcols);
607:
608: printf("addr:\t\tcycles:\n");
609: show = (show < active ? show : active);
610: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
611: addr = index2address(*sort_arr);
612: count = data[*sort_arr].cycles;
613: percentage = 100.0*count/cpu_profile.all.cycles;
614: printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
615: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
616: Disasm(stdout, addr, &nextpc, 1);
617: }
618: printf("%d CPU addresses listed.\n", show);
619:
620: Disasm_SetColumns(oldcols);
621: }
622:
623: /**
624: * compare function for qsort() to sort CPU profile data by descending
625: * address access counts.
626: */
627: static int cmp_cpu_count(const void *p1, const void *p2)
628: {
629: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].count;
630: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].count;
631: if (count1 > count2) {
632: return -1;
633: }
634: if (count1 < count2) {
635: return 1;
636: }
637: return 0;
638: }
639:
640: /**
641: * Sort CPU profile data addresses by call counts and show the results.
642: * If symbols are requested and symbols are loaded, show (only) addresses
643: * matching a symbol.
644: */
645: void Profile_CpuShowCounts(int show, bool only_symbols)
646: {
647: cpu_profile_item_t *data = cpu_profile.data;
648: int symbols, matched, active;
649: int oldcols[DISASM_COLUMNS];
650: Uint32 *sort_arr, *end, addr, nextpc;
651: const char *name;
652: float percentage;
653: Uint32 count;
654:
655: if (!data) {
656: fprintf(stderr, "ERROR: no CPU profiling data available!\n");
657: return;
658: }
659: active = cpu_profile.active;
660: show = (show < active ? show : active);
661:
662: sort_arr = cpu_profile.sort_arr;
663: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_count);
664:
665: if (!only_symbols) {
666: leave_instruction_column(oldcols);
667: printf("addr:\t\tcount:\n");
668: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
669: addr = index2address(*sort_arr);
670: count = data[*sort_arr].count;
671: percentage = 100.0*count/cpu_profile.all.count;
672: printf("0x%06x\t%5.2f%%\t%d%s\t",
673: addr, percentage, count,
674: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
675: Disasm(stdout, addr, &nextpc, 1);
676: }
677: printf("%d CPU addresses listed.\n", show);
678: Disasm_SetColumns(oldcols);
679: return;
680: }
681:
1.1.1.5 ! root 682: symbols = Symbols_CpuCodeCount();
1.1 root 683: if (!symbols) {
684: fprintf(stderr, "ERROR: no CPU symbols loaded!\n");
685: return;
686: }
687: matched = 0;
688:
689: leave_instruction_column(oldcols);
690:
691: printf("addr:\t\tcount:\t\tsymbol:\n");
692: for (end = sort_arr + active; sort_arr < end; sort_arr++) {
693:
694: addr = index2address(*sort_arr);
1.1.1.5 ! root 695: name = Symbols_GetByCpuAddress(addr, SYMTYPE_TEXT);
1.1 root 696: if (!name) {
697: continue;
698: }
699: count = data[*sort_arr].count;
700: percentage = 100.0*count/cpu_profile.all.count;
701: printf("0x%06x\t%5.2f%%\t%d\t%s%s\t",
702: addr, percentage, count, name,
703: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
704: Disasm(stdout, addr, &nextpc, 1);
705:
706: matched++;
707: if (matched >= show || matched >= symbols) {
708: break;
709: }
710: }
711: printf("%d CPU symbols listed.\n", matched);
712:
713: Disasm_SetColumns(oldcols);
714: }
715:
716:
717: static const char * addr2name(Uint32 addr, Uint64 *total)
718: {
719: Uint32 idx = address2index(addr);
720: *total = cpu_profile.data[idx].count;
1.1.1.5 ! root 721: return Symbols_GetByCpuAddress(addr, SYMTYPE_TEXT);
1.1 root 722: }
723:
724: /**
725: * Output CPU callers info to given file.
726: */
727: void Profile_CpuShowCallers(FILE *fp)
728: {
729: Profile_ShowCallers(fp, cpu_callinfo.sites, cpu_callinfo.site, addr2name);
730: }
731:
732: /**
733: * Save CPU profile information to given file.
734: */
735: void Profile_CpuSave(FILE *out)
736: {
1.1.1.3 root 737: Uint32 text, end;
738: fputs("Field names:\tExecuted instructions, Used cycles, Instruction cache misses, Data cache hits\n", out);
739: /* (Python) regexp that matches address and all described fields from disassembly:
740: * $<hex> : <ASM> <percentage>% (<count>, <cycles>, <i-misses>, <d-hits>)
741: * $e5af38 : rts 0.00% (12, 0, 12, 0)
1.1 root 742: */
743: fputs("Field regexp:\t^\\$([0-9a-f]+) :.*% \\((.*)\\)$\n", out);
744: /* some information for interpreting the addresses */
1.1.1.3 root 745: fprintf(out, "ST_RAM:\t\t0x%06x-0x%06x\n", 0, STRamEnd);
746: end = TosAddress + TosSize;
747: fprintf(out, "ROM_TOS:\t0x%06x-0x%06x\n", TosAddress, end);
748: fprintf(out, "CARTRIDGE:\t0x%06x-0x%06x\n", CART_START, CART_END);
1.1 root 749: text = DebugInfo_GetTEXT();
1.1.1.3 root 750: if (text && (text < TosAddress || text >= TTRAM_START)) {
1.1 root 751: fprintf(out, "PROGRAM_TEXT:\t0x%06x-0x%06x\n", text, DebugInfo_GetTEXTEnd());
752: }
1.1.1.5 ! root 753: if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) {
! 754: end = TTRAM_START + 1024*ConfigureParams.Memory.TTRamSize_KB;
1.1.1.3 root 755: fprintf(out, "TT_RAM:\t\t0x%08x-0x%08x\n", TTRAM_START, end);
756: } else if (end < CART_END) {
757: end = CART_END;
758: }
1.1.1.5 ! root 759: Profile_CpuShowAddresses(0, end-2, out, PAGING_DISABLED);
1.1 root 760: Profile_CpuShowCallers(out);
761: }
762:
763: /* ------------------ CPU profile control ----------------- */
764:
765: /**
766: * Initialize CPU profiling when necessary. Return true if profiling.
767: */
768: bool Profile_CpuStart(void)
769: {
770: int size;
771:
772: Profile_FreeCallinfo(&(cpu_callinfo));
773: if (cpu_profile.sort_arr) {
774: /* remove previous results */
775: free(cpu_profile.sort_arr);
776: free(cpu_profile.data);
777: cpu_profile.sort_arr = NULL;
778: cpu_profile.data = NULL;
779: printf("Freed previous CPU profile buffers.\n");
780: }
781: if (!cpu_profile.enabled) {
782: return false;
783: }
784: /* zero everything */
785: memset(&cpu_profile, 0, sizeof(cpu_profile));
786:
787: /* Shouldn't change within same debug session */
1.1.1.3 root 788: size = (STRamEnd + CART_SIZE + TosSize) / 2;
1.1.1.5 ! root 789: if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) {
! 790: size += ConfigureParams.Memory.TTRamSize_KB * 1024/2;
1.1.1.3 root 791: }
1.1 root 792:
793: /* Add one entry for catching invalid PC values */
794: cpu_profile.data = calloc(size + 1, sizeof(*cpu_profile.data));
795: if (!cpu_profile.data) {
796: perror("ERROR, new CPU profile buffer alloc failed");
797: return false;
798: }
799: printf("Allocated CPU profile buffer (%d MB).\n",
800: (int)sizeof(*cpu_profile.data)*size/(1024*1024));
801: cpu_profile.size = size;
802:
1.1.1.5 ! root 803: Profile_AllocCallinfo(&(cpu_callinfo), Symbols_CpuCodeCount(), "CPU");
1.1 root 804:
805: /* special hack for EmuTOS */
806: etos_switcher = PC_UNDEFINED;
807: if (cpu_callinfo.sites && bIsEmuTOS &&
808: (!Symbols_GetCpuAddress(SYMTYPE_TEXT, "_switchto", &etos_switcher) || etos_switcher < TosAddress)) {
809: etos_switcher = PC_UNDEFINED;
810: }
811:
1.1.1.3 root 812: /* reset cache stats (CPU emulation doesn't do that) */
813: CpuInstruction.D_Cache_hit = 0;
814: CpuInstruction.I_Cache_hit = 0;
815: CpuInstruction.I_Cache_miss = 0;
816: CpuInstruction.D_Cache_miss = 0;
1.1 root 817:
1.1.1.3 root 818: cpu_profile.prev_cycles = CyclesGlobalClockCounter;
819: cpu_profile.prev_family = OpcodeFamily;
820: cpu_profile.prev_pc = M68000_GetPC();
821: if (ConfigureParams.System.bAddressSpace24) {
822: cpu_profile.prev_pc &= 0xffffff;
823: }
1.1.1.2 root 824: cpu_profile.loop_start = PC_UNDEFINED;
825: cpu_profile.loop_end = PC_UNDEFINED;
826: cpu_profile.loop_count = 0;
827: Profile_LoopReset();
828:
1.1 root 829: cpu_profile.disasm_addr = 0;
830: cpu_profile.processed = false;
831: cpu_profile.enabled = true;
832: return cpu_profile.enabled;
833: }
834:
835: /**
836: * return true if pc could be next instruction for previous pc
837: */
838: static bool is_prev_instr(Uint32 prev_pc, Uint32 pc)
839: {
840: /* just moved to next instruction (1-2 words)? */
841: if (prev_pc < pc && (pc - prev_pc) <= 10) {
842: return true;
843: }
844: return false;
845: }
846:
847: /**
848: * return caller instruction type classification
849: */
850: static calltype_t cpu_opcode_type(int family, Uint32 prev_pc, Uint32 pc)
851: {
852: switch (family) {
853:
854: case i_JSR:
855: case i_BSR:
856: return CALL_SUBROUTINE;
857:
858: case i_RTS:
859: case i_RTR:
860: case i_RTD:
861: return CALL_SUBRETURN;
862:
863: case i_JMP: /* often used also for "inlined" function calls... */
864: case i_Bcc: /* both BRA & BCC */
865: case i_FBcc:
866: case i_DBcc:
867: case i_FDBcc:
868: return CALL_BRANCH;
869:
870: case i_TRAP:
871: case i_TRAPV:
872: case i_TRAPcc:
873: case i_FTRAPcc:
874: case i_STOP:
875: case i_ILLG:
876: case i_CHK:
877: case i_CHK2:
878: case i_BKPT:
879: return CALL_EXCEPTION;
880:
881: case i_RTE:
882: return CALL_EXCRETURN;
883: }
884: /* just moved to next instruction? */
885: if (is_prev_instr(prev_pc, pc)) {
886: return CALL_NEXT;
887: }
888: return CALL_UNKNOWN;
889: }
890:
891: /**
892: * If call tracking is enabled (there are symbols), collect
893: * information about subroutine and other calls, and their costs.
894: *
895: * Like with profile data, caller info checks need to be for previous
896: * instruction, that's why "pc" argument for this function actually
897: * needs to be previous PC.
898: */
899: static void collect_calls(Uint32 pc, counters_t *counters)
900: {
901: calltype_t flag;
902: int idx, family;
903: Uint32 prev_pc, caller_pc;
904:
905: family = cpu_profile.prev_family;
906: cpu_profile.prev_family = OpcodeFamily;
907:
908: prev_pc = cpu_callinfo.prev_pc;
909: cpu_callinfo.prev_pc = pc;
910: caller_pc = PC_UNDEFINED;
911:
912: /* address is return address for last subroutine call? */
913: if (unlikely(pc == cpu_callinfo.return_pc) && likely(cpu_callinfo.depth)) {
914:
915: flag = cpu_opcode_type(family, prev_pc, pc);
1.1.1.2 root 916: /* previous address can be exception return (e.g. RTE) instead of RTS,
917: * if exception occurred right after returning from subroutine call.
1.1 root 918: */
919: if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) {
920: caller_pc = Profile_CallEnd(&cpu_callinfo, counters);
921: } else {
922: #if DEBUG
923: /* although at return address, it didn't return yet,
924: * e.g. because there was a jsr or jump to return address
925: */
926: Uint32 nextpc;
1.1.1.2 root 927: fprintf(stderr, "WARNING: subroutine call returned 0x%x -> 0x%x, not through RTS!\n", prev_pc, pc);
1.1 root 928: Disasm(stderr, prev_pc, &nextpc, 1);
929: #endif
930: }
1.1.1.2 root 931: /* next address might be another symbol, so need to fall through */
1.1 root 932: }
933:
934: /* address is one which we're tracking? */
1.1.1.5 ! root 935: idx = Symbols_GetCpuCodeIndex(pc);
1.1 root 936: if (unlikely(idx >= 0)) {
937:
938: flag = cpu_opcode_type(family, prev_pc, pc);
1.1.1.2 root 939: if (flag == CALL_SUBROUTINE || flag == CALL_EXCEPTION) {
1.1 root 940: /* special HACK for for EmuTOS AES switcher which
941: * changes stack content to remove itself from call
942: * stack and uses RTS for subroutine *calls*, not
943: * for returning from them.
944: *
945: * It wouldn't be reliable to detect calls from it,
946: * so I'm making call *to* it show up as branch, to
947: * keep callstack depth correct.
948: */
949: if (unlikely(pc == etos_switcher)) {
950: flag = CALL_BRANCH;
951: } else if (unlikely(prev_pc == PC_UNDEFINED)) {
952: /* if first profiled instruction
953: * is subroutine call, it doesn't have
954: * valid prev_pc value stored
955: */
956: cpu_callinfo.return_pc = PC_UNDEFINED;
1.1.1.3 root 957: fprintf(stderr, "WARNING: previous PC for tracked address 0x%d is undefined!\n", pc);
1.1 root 958: #if DEBUG
959: skip_assert = true;
960: DebugUI(REASON_CPU_EXCEPTION);
961: #endif
962: } else {
963: /* slow! */
964: cpu_callinfo.return_pc = Disasm_GetNextPC(prev_pc);
965: }
966: } else if (caller_pc != PC_UNDEFINED) {
1.1.1.2 root 967: /* returned from function to first instruction of another symbol:
1.1 root 968: * 0xf384 jsr some_function
969: * other_symbol:
970: * 0f3x8a some_instruction
971: * -> change return instruction address to
972: * address of what did the returned call.
973: */
974: prev_pc = caller_pc;
975: assert(is_prev_instr(prev_pc, pc));
976: flag = CALL_NEXT;
977: }
978: Profile_CallStart(idx, &cpu_callinfo, prev_pc, flag, pc, counters);
979: }
980: }
981:
982: /**
1.1.1.2 root 983: * log last loop info, if there's suitable data for one
984: */
985: static void log_last_loop(void)
986: {
987: unsigned len = cpu_profile.loop_end - cpu_profile.loop_start;
988: if (cpu_profile.loop_count > 1 && (len < profile_loop.cpu_limit || !profile_loop.cpu_limit)) {
989: fprintf(profile_loop.fp, "CPU %d 0x%06x %d %d\n", nVBLs,
990: cpu_profile.loop_start, len, cpu_profile.loop_count);
991: }
992: }
993:
1.1.1.3 root 994: # if DEBUG || ENABLE_WINUAE_CPU
995: /**
996: * Warning for values going out of expected range
997: */
998: static Uint32 warn_too_large(const char *name, const int value, const int limit, const Uint32 prev_pc, const Uint32 pc)
999: {
1000: Uint32 nextpc;
1001: fprintf(stderr, "WARNING: unexpected (%d > %d) %s at 0x%x:\n", value, limit - 1, name, pc);
1002: Disasm(stderr, prev_pc, &nextpc, 1);
1003: Disasm(stderr, pc, &nextpc, 1);
1004: #if DEBUG
1005: skip_assert = true;
1006: DebugUI(REASON_CPU_EXCEPTION);
1007: #endif
1008: return limit - 1;
1009: }
1010: #endif
1011:
1.1.1.2 root 1012: /**
1.1 root 1013: * Update CPU cycle and count statistics for PC address.
1014: *
1015: * This gets called after instruction has executed and PC
1016: * has advanced to next instruction.
1017: */
1018: void Profile_CpuUpdate(void)
1019: {
1020: counters_t *counters = &(cpu_profile.all);
1.1.1.3 root 1021: Uint32 pc, prev_pc, idx, cycles;
1.1 root 1022: cpu_profile_item_t *prev;
1.1.1.3 root 1023: #if ENABLE_WINUAE_CPU
1024: Uint32 i_hits, d_hits, i_misses, d_misses;
1025: #else
1026: const Uint32 i_misses = 0, d_hits = 0;
1027: #endif
1.1 root 1028:
1029: prev_pc = cpu_profile.prev_pc;
1.1.1.3 root 1030: /* PC may have extra bits when using 24 bit addressing, they need to be masked away as
1.1 root 1031: * emulation itself does that too when PC value is used
1032: */
1.1.1.3 root 1033: cpu_profile.prev_pc = pc = M68000_GetPC();
1034: if (ConfigureParams.System.bAddressSpace24) {
1035: cpu_profile.prev_pc &= 0xffffff;
1036: }
1.1.1.2 root 1037: if (unlikely(profile_loop.fp)) {
1038: if (pc < prev_pc) {
1039: if (pc == cpu_profile.loop_start && prev_pc == cpu_profile.loop_end) {
1040: cpu_profile.loop_count++;
1041: } else {
1042: cpu_profile.loop_start = pc;
1043: cpu_profile.loop_end = prev_pc;
1044: cpu_profile.loop_count = 1;
1045: }
1046: } else {
1047: if (pc > cpu_profile.loop_end) {
1048: log_last_loop();
1.1.1.3 root 1049: cpu_profile.loop_end = 0xffffffff;
1.1.1.2 root 1050: cpu_profile.loop_count = 0;
1051: }
1052: }
1053: }
1054:
1.1 root 1055: idx = address2index(prev_pc);
1056: assert(idx <= cpu_profile.size);
1057: prev = cpu_profile.data + idx;
1058:
1059: if (likely(prev->count < MAX_CPU_PROFILE_VALUE)) {
1060: prev->count++;
1061: }
1062:
1.1.1.3 root 1063: cycles = CyclesGlobalClockCounter - cpu_profile.prev_cycles;
1064: cpu_profile.prev_cycles = CyclesGlobalClockCounter;
1.1 root 1065:
1066: if (likely(prev->cycles < MAX_CPU_PROFILE_VALUE - cycles)) {
1067: prev->cycles += cycles;
1068: } else {
1069: prev->cycles = MAX_CPU_PROFILE_VALUE;
1070: }
1071:
1072: #if ENABLE_WINUAE_CPU
1.1.1.3 root 1073: /* only WinUAE CPU core provides cache information */
1074: i_hits = CpuInstruction.I_Cache_hit;
1075: d_hits = CpuInstruction.D_Cache_hit;
1076: i_misses = CpuInstruction.I_Cache_miss;
1077: d_misses = CpuInstruction.D_Cache_miss;
1078:
1079: /* reset cache stats after reading them (for the next instruction) */
1080: CpuInstruction.I_Cache_hit = 0;
1081: CpuInstruction.D_Cache_hit = 0;
1082: CpuInstruction.I_Cache_miss = 0;
1083: CpuInstruction.D_Cache_miss = 0;
1084:
1085: /* tracked for every address */
1.1.1.5 ! root 1086: # if DEBUG_CACHE
! 1087: if (likely(prev->i_hits < MAX_CPU_PROFILE_VALUE - i_hits)) {
! 1088: prev->i_hits += i_hits;
! 1089: } else {
! 1090: prev->i_hits = MAX_CPU_PROFILE_VALUE;
! 1091: }
! 1092: if (likely(prev->d_misses < MAX_CPU_PROFILE_VALUE - d_misses)) {
! 1093: prev->d_misses += d_misses;
! 1094: } else {
! 1095: prev->d_misses = MAX_CPU_PROFILE_VALUE;
! 1096: }
! 1097: # endif
1.1.1.3 root 1098: if (likely(prev->i_misses < MAX_CPU_PROFILE_VALUE - i_misses)) {
1099: prev->i_misses += i_misses;
1.1 root 1100: } else {
1.1.1.3 root 1101: prev->i_misses = MAX_CPU_PROFILE_VALUE;
1.1 root 1102: }
1.1.1.3 root 1103: if (likely(prev->d_hits < MAX_CPU_PROFILE_VALUE - d_hits)) {
1104: prev->d_hits += d_hits;
1105: } else {
1106: prev->d_hits = MAX_CPU_PROFILE_VALUE;
1107: }
1108:
1109: /* tracking for histogram, check for array overflows */
1.1.1.5 ! root 1110: if (!(i_hits || i_misses)) {
! 1111: cpu_profile.i_prefetched++;
! 1112: }
1.1.1.3 root 1113: if (unlikely(i_hits >= MAX_I_HITS)) {
1114: i_hits = warn_too_large("number of CPU instruction cache hits", i_hits, MAX_I_HITS, prev_pc, pc);
1115: }
1116: cpu_profile.i_hit_counts[i_hits]++;
1117:
1118: if (unlikely(i_misses >= MAX_I_MISSES)) {
1119: i_misses = warn_too_large("number of CPU instruction cache misses", i_misses, MAX_I_MISSES, prev_pc, pc);
1120: }
1121: cpu_profile.i_miss_counts[i_misses]++;
1122:
1123: if (unlikely(d_hits >= MAX_D_HITS)) {
1124: d_hits = warn_too_large("number of CPU data cache hits", d_hits, MAX_D_HITS, prev_pc, pc);
1125: }
1126: cpu_profile.d_hit_counts[d_hits]++;
1127:
1128: if (unlikely(d_misses >= MAX_D_MISSES)) {
1129: d_misses = warn_too_large("number of CPU data cache misses", d_misses, MAX_D_MISSES, prev_pc, pc);
1130: }
1131: cpu_profile.d_miss_counts[d_misses]++;
1.1.1.5 ! root 1132: #endif /* ENABLE_WINUAE_CPU */
1.1.1.3 root 1133:
1.1 root 1134: if (cpu_callinfo.sites) {
1135: collect_calls(prev_pc, counters);
1136: }
1.1.1.5 ! root 1137: /* total counters are increased after caller info is processed,
1.1 root 1138: * otherwise cost for the instruction calling the callee
1139: * doesn't get accounted to caller (but callee).
1140: */
1141: counters->count++;
1.1.1.3 root 1142: counters->cycles += cycles;
1143: counters->i_misses += i_misses;
1144: counters->d_hits += d_hits;
1.1 root 1145:
1146: #if DEBUG
1147: if (unlikely(OpcodeFamily == 0)) {
1148: Uint32 nextpc;
1149: fputs("WARNING: instruction opcode family is zero (=i_ILLG) for instruction:\n", stderr);
1150: Disasm(stderr, prev_pc, &nextpc, 1);
1151: }
1152: /* catch too large (and negative) cycles for other than STOP instruction */
1153: if (unlikely(cycles > 512 && OpcodeFamily != i_STOP)) {
1.1.1.3 root 1154: warn_too_large("cycles", cycles, 512, prev_pc, pc);
1.1 root 1155: }
1.1.1.3 root 1156: # if !ENABLE_WINUAE_CPU
1157: {
1158: static Uint32 prev_cycles = 0, prev_pc2 = 0;
1159: if (unlikely(cycles == 0 && prev_cycles == 0)) {
1160: Uint32 nextpc;
1161: fputs("WARNING: Zero cycles for successive opcodes:\n", stderr);
1162: Disasm(stderr, prev_pc2, &nextpc, 1);
1163: Disasm(stderr, prev_pc, &nextpc, 1);
1164: }
1165: prev_cycles = cycles;
1166: prev_pc2 = prev_pc;
1.1 root 1167: }
1.1.1.3 root 1168: # endif
1.1 root 1169: #endif
1170: }
1171:
1172:
1173: /**
1174: * Helper for accounting CPU profile area item.
1175: */
1176: static void update_area_item(profile_area_t *area, Uint32 addr, cpu_profile_item_t *item)
1177: {
1178: Uint32 cycles = item->cycles;
1179: Uint32 count = item->count;
1180:
1181: if (!count) {
1182: return;
1183: }
1184: area->counters.count += count;
1185: area->counters.cycles += cycles;
1.1.1.3 root 1186: area->counters.i_misses += item->i_misses;
1187: area->counters.d_hits += item->d_hits;
1.1 root 1188:
1189: if (cycles == MAX_CPU_PROFILE_VALUE) {
1190: area->overflow = true;
1191: }
1192: if (addr < area->lowest) {
1193: area->lowest = addr;
1194: }
1195: area->highest = addr;
1196:
1197: area->active++;
1198: }
1199:
1200: /**
1201: * Helper for collecting CPU profile area statistics.
1202: */
1203: static Uint32 update_area(profile_area_t *area, Uint32 start, Uint32 end)
1204: {
1205: cpu_profile_item_t *item;
1206: Uint32 addr;
1207:
1208: memset(area, 0, sizeof(profile_area_t));
1209: area->lowest = cpu_profile.size;
1210:
1211: item = &(cpu_profile.data[start]);
1212: for (addr = start; addr < end; addr++, item++) {
1213: update_area_item(area, addr, item);
1214: }
1215: return addr;
1216: }
1217:
1218: /**
1219: * Helper for initializing CPU profile area sorting indexes.
1220: */
1221: static Uint32* index_area(profile_area_t *area, Uint32 *sort_arr)
1222: {
1223: cpu_profile_item_t *item;
1224: Uint32 addr;
1225:
1226: item = &(cpu_profile.data[area->lowest]);
1227: for (addr = area->lowest; addr <= area->highest; addr++, item++) {
1228: if (item->count) {
1229: *sort_arr++ = addr;
1230: }
1231: }
1232: return sort_arr;
1233: }
1234:
1235: /**
1236: * Stop and process the CPU profiling data; collect stats and
1237: * prepare for more optimal sorting.
1238: */
1239: void Profile_CpuStop(void)
1240: {
1241: Uint32 *sort_arr, next;
1.1.1.3 root 1242: unsigned int size, stsize;
1.1 root 1243: int active;
1244:
1245: if (cpu_profile.processed || !cpu_profile.enabled) {
1246: return;
1247: }
1.1.1.2 root 1248:
1249: log_last_loop();
1250: if (profile_loop.fp) {
1251: fflush(profile_loop.fp);
1252: }
1253:
1.1 root 1254: /* user didn't change RAM or TOS size in the meanwhile? */
1.1.1.3 root 1255: size = stsize = (STRamEnd + CART_SIZE + TosSize) / 2;
1.1.1.5 ! root 1256: if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) {
! 1257: size += ConfigureParams.Memory.TTRamSize_KB * 1024/2;
1.1.1.3 root 1258: }
1259: assert(cpu_profile.size == size);
1.1 root 1260:
1261: Profile_FinalizeCalls(&(cpu_callinfo), &(cpu_profile.all), Symbols_GetByCpuAddress);
1262:
1263: /* find lowest and highest addresses executed etc */
1264: next = update_area(&cpu_profile.ram, 0, STRamEnd/2);
1.1.1.3 root 1265: if (TosAddress < CART_START) {
1266: next = update_area(&cpu_profile.tos, next, (STRamEnd + TosSize)/2);
1267: next = update_area(&cpu_profile.rom, next, stsize);
1268: } else {
1269: next = update_area(&cpu_profile.rom, next, (STRamEnd + CART_SIZE)/2);
1270: next = update_area(&cpu_profile.tos, next, stsize);
1271: }
1272: next = update_area(&cpu_profile.ttram, next, size);
1273: assert(next == size);
1.1 root 1274:
1275: #if DEBUG
1276: if (skip_assert) {
1277: skip_assert = false;
1278: } else
1279: #endif
1280: {
1.1.1.3 root 1281: #if DEBUG
1282: if (cpu_profile.all.count != cpu_profile.ttram.counters.count + cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count) {
1283: fprintf(stderr, "ERROR, instruction count mismatch:\n\t%"PRIu64" != %"PRIu64" + %"PRIu64" + %"PRIu64" + %"PRIu64"?\n",
1284: cpu_profile.all.count, cpu_profile.ttram.counters.count, cpu_profile.ram.counters.count,
1285: cpu_profile.tos.counters.count, cpu_profile.rom.counters.count);
1286: fprintf(stderr, "If there was debugger invocation from profiling before this, try with profiler DEBUG define disabled!!!\n");
1287: }
1288: #endif
1289: assert(cpu_profile.all.count == cpu_profile.ttram.counters.count + cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count);
1290: assert(cpu_profile.all.cycles == cpu_profile.ttram.counters.cycles + cpu_profile.ram.counters.cycles + cpu_profile.tos.counters.cycles + cpu_profile.rom.counters.cycles);
1291: assert(cpu_profile.all.i_misses == cpu_profile.ttram.counters.i_misses + cpu_profile.ram.counters.i_misses + cpu_profile.tos.counters.i_misses + cpu_profile.rom.counters.i_misses);
1292: assert(cpu_profile.all.d_hits == cpu_profile.ttram.counters.d_hits + cpu_profile.ram.counters.d_hits + cpu_profile.tos.counters.d_hits + cpu_profile.rom.counters.d_hits);
1.1 root 1293: }
1294:
1295: /* allocate address array for sorting */
1.1.1.3 root 1296: active = cpu_profile.ttram.active + cpu_profile.ram.active + cpu_profile.rom.active + cpu_profile.tos.active;
1.1 root 1297: sort_arr = calloc(active, sizeof(*sort_arr));
1298:
1299: if (!sort_arr) {
1300: perror("ERROR: allocating CPU profile address data");
1301: free(cpu_profile.data);
1302: cpu_profile.data = NULL;
1303: return;
1304: }
1305: printf("Allocated CPU profile address buffer (%d KB).\n",
1306: (int)sizeof(*sort_arr)*(active+512)/1024);
1307: cpu_profile.sort_arr = sort_arr;
1308: cpu_profile.active = active;
1309:
1310: /* and fill addresses for used instructions... */
1311: sort_arr = index_area(&cpu_profile.ram, sort_arr);
1312: sort_arr = index_area(&cpu_profile.tos, sort_arr);
1313: sort_arr = index_area(&cpu_profile.rom, sort_arr);
1.1.1.3 root 1314: sort_arr = index_area(&cpu_profile.ttram, sort_arr);
1.1 root 1315: assert(sort_arr == cpu_profile.sort_arr + cpu_profile.active);
1316: //printf("%d/%d/%d\n", area->active, sort_arr-cpu_profile.sort_arr, active);
1317:
1318: Profile_CpuShowStats();
1319: cpu_profile.processed = true;
1320: }
1321:
1322: /**
1323: * Get pointers to CPU profile enabling and disasm address variables
1324: * for updating them (in parser).
1325: */
1326: void Profile_CpuGetPointers(bool **enabled, Uint32 **disasm_addr)
1327: {
1328: *disasm_addr = &cpu_profile.disasm_addr;
1329: *enabled = &cpu_profile.enabled;
1330: }
1331:
1332: /**
1333: * Get callinfo & symbol search pointers for stack walking.
1334: */
1.1.1.5 ! root 1335: void Profile_CpuGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32, symtype_t))
1.1 root 1336: {
1337: *callinfo = &(cpu_callinfo);
1338: *get_symbol = Symbols_GetByCpuAddress;
1339: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.