|
|
1.1 root 1: /*
2: * Hatari - profilecpu.c
3: *
1.1.1.3 root 4: * Copyright (C) 2010-2015 by Eero Tamminen
1.1 root 5: *
6: * This file is distributed under the GNU General Public License, version 2
7: * or at your option any later version. Read the file gpl.txt for details.
8: *
9: * profilecpu.c - functions for profiling CPU and showing the results.
10: */
11: const char Profilecpu_fileid[] = "Hatari profilecpu.c : " __DATE__ " " __TIME__;
12:
13: #include <stdio.h>
14: #include <inttypes.h>
15: #include <assert.h>
16: #include "main.h"
17: #include "configuration.h"
18: #include "clocks_timings.h"
19: #include "debugInfo.h"
20: #include "dsp.h"
21: #include "m68000.h"
22: #include "68kDisass.h"
1.1.1.5 root 23: #include "symbols.h"
1.1 root 24: #include "profile.h"
25: #include "profile_priv.h"
1.1.1.5 root 26: #include "debug_priv.h"
1.1 root 27: #include "stMemory.h"
28: #include "tos.h"
1.1.1.2 root 29: #include "screen.h"
30: #include "video.h"
31:
32:
33: /* cartridge area */
34: #define CART_START 0xFA0000
35: #define CART_END 0xFC0000
36: #define CART_SIZE (CART_END - CART_START)
37:
1.1.1.3 root 38: #define TTRAM_START 0x01000000
1.1 root 39:
40: /* if non-zero, output (more) warnings on suspicious:
41: * - cycle/instruction counts
42: * - PC switches
1.1.1.3 root 43: * And drop to debugger on invalid current & previous PC addresses.
44: *
45: * NOTE: DebugUI() calls that DEBUG define enables, can cause
46: * instruction count mismatch assertions because debugger invocation
47: * resets the counters AND happens in middle of data collection.
48: * It's best to quit after debugging the issue ('q' command).
1.1 root 49: */
50: #define DEBUG 0
51: #if DEBUG
52: #include "debugui.h"
53: static bool skip_assert;
54: #endif
55:
1.1.1.5 root 56: /* whether to track & show all cache stats for all instructions */
57: #define DEBUG_CACHE 0
58:
59:
1.1 root 60: static callinfo_t cpu_callinfo;
61:
62: #define MAX_CPU_PROFILE_VALUE 0xFFFFFFFF
63:
64: typedef struct {
1.1.1.5 root 65: Uint32 count; /* how many times this address instruction is executed */
1.1 root 66: Uint32 cycles; /* how many CPU cycles was taken at this address */
1.1.1.5 root 67: #if DEBUG_CACHE /* track also less relevant cache events */
68: Uint32 i_hits; /* how many CPU i-cache hits happened at this address */
69: Uint32 d_misses; /* how many CPU d-cache misses happened at this address */
70: #endif
71: Uint32 i_misses; /* how many CPU i-cache misses happened at this address */
72: Uint32 d_hits; /* how many CPU d-cache hits happened at this address */
1.1 root 73: } cpu_profile_item_t;
74:
1.1.1.5 root 75:
76: /* max count of hits/misses single instruction can trigger at once */
1.1.1.3 root 77: #define MAX_I_HITS 8
78: #define MAX_I_MISSES 8
79: #define MAX_D_HITS 32
80: #define MAX_D_MISSES 20
1.1 root 81:
82: static struct {
83: counters_t all; /* total counts for all areas */
84: cpu_profile_item_t *data; /* profile data items */
85: Uint32 size; /* number of allocated profile data items */
1.1.1.3 root 86: profile_area_t ttram; /* TT-RAM stats */
1.1 root 87: profile_area_t ram; /* normal RAM stats */
88: profile_area_t rom; /* cartridge ROM stats */
89: profile_area_t tos; /* ROM TOS stats */
90: int active; /* number of active data items in all areas */
91: Uint32 *sort_arr; /* data indexes used for sorting */
1.1.1.2 root 92: int prev_family; /* previous instruction opcode family */
1.1.1.3 root 93: Uint64 prev_cycles; /* previous instruction cycles counter */
1.1 root 94: Uint32 prev_pc; /* previous instruction address */
1.1.1.2 root 95: Uint32 loop_start; /* address of last loop start */
96: Uint32 loop_end; /* address of last loop end */
97: Uint32 loop_count; /* how many times it was looped */
1.1 root 98: Uint32 disasm_addr; /* 'addresses' command start address */
1.1.1.3 root 99: #if ENABLE_WINUAE_CPU
1.1.1.5 root 100: Uint32 i_prefetched; /* instructions that don't incur prefetch hit/miss */
1.1.1.3 root 101: Uint32 i_hit_counts[MAX_I_HITS]; /* I-cache hit counts */
102: Uint32 d_hit_counts[MAX_D_HITS]; /* D-cache hit counts */
103: Uint32 i_miss_counts[MAX_I_MISSES]; /* I-cache miss counts */
104: Uint32 d_miss_counts[MAX_D_MISSES]; /* D-cache miss counts */
105: #endif
1.1 root 106: bool processed; /* true when data is already processed */
107: bool enabled; /* true when profiling enabled */
108: } cpu_profile;
109:
110: /* special hack for EmuTOS */
111: static Uint32 etos_switcher;
112:
113:
114: /* ------------------ CPU profile address mapping ----------------- */
115:
116: /**
117: * convert Atari memory address to sorting array profile data index.
118: */
119: static inline Uint32 address2index(Uint32 pc)
120: {
121: if (unlikely(pc & 1)) {
122: fprintf(stderr, "WARNING: odd CPU profile instruction address 0x%x!\n", pc);
123: #if DEBUG
124: skip_assert = true;
125: DebugUI(REASON_CPU_EXCEPTION);
126: #endif
127: }
1.1.1.2 root 128: if (pc < STRamEnd) {
129: /* most likely case, use RAM address as-is */
130:
131: } else if (pc >= TosAddress && pc < TosAddress + TosSize) {
1.1 root 132: /* TOS, put it after RAM data */
133: pc = pc - TosAddress + STRamEnd;
1.1.1.2 root 134: if (TosAddress >= CART_END) {
135: /* and after cartridge data as it's higher */
136: pc += CART_SIZE;
137: }
138: } else if (pc >= CART_START && pc < CART_END) {
139: /* ROM, put it after RAM data */
140: pc = pc - CART_START + STRamEnd;
141: if (TosAddress < CART_START) {
142: /* and after TOS as it's higher */
143: pc += TosSize;
144: }
1.1.1.3 root 145: #if ENABLE_WINUAE_CPU
1.1.1.5 root 146: } else if (TTmemory && pc >= TTRAM_START && pc < TTRAM_START + 1024*(unsigned)ConfigureParams.Memory.TTRamSize_KB) {
1.1.1.3 root 147: pc += STRamEnd + TosSize + CART_SIZE - TTRAM_START;
148: #endif
1.1 root 149: } else {
1.1.1.2 root 150: fprintf(stderr, "WARNING: 'invalid' CPU PC profile instruction address 0x%x!\n", pc);
151: /* extra entry at end is reserved for invalid PC values */
1.1.1.3 root 152: pc = STRamEnd + TosSize + CART_SIZE;
1.1 root 153: #if DEBUG
1.1.1.2 root 154: skip_assert = true;
155: DebugUI(REASON_CPU_EXCEPTION);
1.1 root 156: #endif
157: }
158: /* CPU instructions are at even addresses, save space by halving */
159: return (pc >> 1);
160: }
161:
162: /**
163: * convert sorting array profile data index to Atari memory address.
164: */
165: static Uint32 index2address(Uint32 idx)
166: {
167: idx <<= 1;
168: /* RAM */
169: if (idx < STRamEnd) {
170: return idx;
171: }
172: idx -= STRamEnd;
1.1.1.2 root 173: /* TOS before cartridge area? */
174: if (TosAddress < CART_START) {
175: /* TOS */
176: if (idx < TosSize) {
177: return idx + TosAddress;
178: }
179: idx -= TosSize;
180: /* ROM */
1.1.1.3 root 181: if (idx < CART_SIZE) {
182: return idx + CART_START;
183: }
184: idx -= CART_SIZE;
1.1.1.2 root 185: } else {
186: /* ROM */
187: if (idx < CART_SIZE) {
188: return idx + CART_START;
189: }
190: idx -= CART_SIZE;
191: /* TOS */
1.1.1.3 root 192: if (idx < TosSize) {
193: return idx + TosAddress;
194: }
195: idx -= TosSize;
1.1 root 196: }
1.1.1.3 root 197: return idx + TTRAM_START;
1.1 root 198: }
199:
200: /* ------------------ CPU profile results ----------------- */
201:
202: /**
1.1.1.5 root 203: * Write string containing CPU cache stats, cycles, count, count percentage
204: * for given address to provided buffer.
205: *
1.1 root 206: * Return true if data was available and non-zero, false otherwise.
207: */
1.1.1.5 root 208: bool Profile_CpuAddressDataStr(char *buffer, size_t maxlen, Uint32 addr)
1.1 root 209: {
1.1.1.5 root 210: cpu_profile_item_t *item;
211: float percentage;
1.1 root 212: Uint32 idx;
1.1.1.5 root 213:
214: assert(buffer && maxlen);
1.1 root 215: if (!cpu_profile.data) {
216: return false;
217: }
218: idx = address2index(addr);
1.1.1.5 root 219: item = &(cpu_profile.data[idx]);
220:
1.1 root 221: if (cpu_profile.all.count) {
1.1.1.5 root 222: percentage = 100.0 * item->count / cpu_profile.all.count;
1.1 root 223: } else {
1.1.1.5 root 224: percentage = 0.0;
1.1 root 225: }
1.1.1.5 root 226: #if DEBUG_CACHE
227: snprintf(buffer, maxlen, "%5.2f%% (%u, %u, %u, %u, %u, %u)",
228: percentage, item->count, item->cycles,
229: item->i_hits, item->i_misses,
230: item->d_hits, item->d_misses);
231: #else
232: snprintf(buffer, maxlen, "%5.2f%% (%u, %u, %u, %u)",
233: percentage, item->count, item->cycles,
234: item->i_misses, item->d_hits);
235: #endif
236: return (item->count > 0);
1.1 root 237: }
238:
239: /**
240: * Helper to show statistics for specified CPU profile area.
241: */
242: static void show_cpu_area_stats(profile_area_t *area)
243: {
244: if (!area->active) {
245: fprintf(stderr, "- no activity\n");
246: return;
247: }
248: fprintf(stderr, "- active address range:\n 0x%06x-0x%06x\n",
249: index2address(area->lowest),
250: index2address(area->highest));
1.1.1.5 root 251: fprintf(stderr, "- active instruction addresses:\n %d (%.2f%% of all areas)\n",
1.1 root 252: area->active,
253: 100.0 * area->active / cpu_profile.active);
1.1.1.5 root 254: fprintf(stderr, "- executed instructions:\n %"PRIu64" (%.2f%% of all areas)\n",
1.1 root 255: area->counters.count,
256: 100.0 * area->counters.count / cpu_profile.all.count);
1.1.1.3 root 257: /* CPU cache in use? */
258: if (cpu_profile.all.i_misses) {
1.1.1.5 root 259: fprintf(stderr, "- instruction cache misses:\n %"PRIu64" (%.2f%% of all areas)\n",
1.1.1.3 root 260: area->counters.i_misses,
261: 100.0 * area->counters.i_misses / cpu_profile.all.i_misses);
262: }
263: if (cpu_profile.all.d_hits) {
1.1.1.5 root 264: fprintf(stderr, "- data cache hits:\n %"PRIu64" (%.2f%% of all areas)\n",
1.1.1.3 root 265: area->counters.d_hits,
266: 100.0 * area->counters.d_hits / cpu_profile.all.d_hits);
1.1 root 267: }
1.1.1.5 root 268: fprintf(stderr, "- used cycles:\n %"PRIu64" (%.2f%% of all areas)\n = %.5fs\n",
1.1 root 269: area->counters.cycles,
270: 100.0 * area->counters.cycles / cpu_profile.all.cycles,
1.1.1.5 root 271: (double)area->counters.cycles / MachineClocks.CPU_Freq_Emul);
1.1 root 272: if (area->overflow) {
273: fprintf(stderr, " *** COUNTER OVERFLOW! ***\n");
274: }
275: }
276:
277:
278: /**
279: * show CPU area (RAM, ROM, TOS) specific statistics.
280: */
281: void Profile_CpuShowStats(void)
282: {
283: fprintf(stderr, "Normal RAM (0-0x%X):\n", STRamEnd);
284: show_cpu_area_stats(&cpu_profile.ram);
285:
286: fprintf(stderr, "ROM TOS (0x%X-0x%X):\n", TosAddress, TosAddress + TosSize);
287: show_cpu_area_stats(&cpu_profile.tos);
288:
1.1.1.2 root 289: fprintf(stderr, "Cartridge ROM (0x%X-%X):\n", CART_START, CART_END);
1.1 root 290: show_cpu_area_stats(&cpu_profile.rom);
291:
1.1.1.5 root 292: if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) {
293: fprintf(stderr, "TT-RAM (0x%X-%X):\n", TTRAM_START, TTRAM_START + 1024*ConfigureParams.Memory.TTRamSize_KB);
1.1.1.3 root 294: show_cpu_area_stats(&cpu_profile.ttram);
295: }
296:
1.1 root 297: fprintf(stderr, "\n= %.5fs\n",
1.1.1.5 root 298: (double)cpu_profile.all.cycles / MachineClocks.CPU_Freq_Emul);
1.1.1.3 root 299: }
1.1 root 300:
301: #if ENABLE_WINUAE_CPU
1.1.1.3 root 302: /**
303: * show percentage histogram of given array items
304: */
305: static void show_histogram(const char *title, int count, Uint32 *items)
306: {
1.1.1.5 root 307: const Uint64 maxval = cpu_profile.all.count;
1.1.1.3 root 308: Uint32 value;
309: int i;
310:
1.1.1.5 root 311: fprintf(stderr, "\n%s, number of occurrences:\n", title);
1.1.1.3 root 312: for (i = 0; i < count; i++) {
313: value = items[i];
314: if (value) {
315: int w, width = 50 * value / maxval+1;
316: fprintf(stderr, " %2d: ", i);
317: for (w = 0; w < width; w++) {
318: fputc('#', stderr);
319: }
320: fprintf(stderr, " %.3f%%\n", 100.0 * value / maxval);
1.1 root 321: }
322: }
323: }
324:
325: /**
1.1.1.3 root 326: * show CPU cache usage histograms
327: */
328: void Profile_CpuShowCaches(void)
329: {
330: if (!(cpu_profile.all.i_misses || cpu_profile.all.d_hits)) {
1.1.1.5 root 331: fprintf(stderr, "No instruction/data cache information.\n");
1.1.1.3 root 332: return;
333: }
1.1.1.5 root 334: fprintf(stderr,
335: "\nNote:\n"
336: "- these statistics include all profiled instructions, but\n"
337: "- instruction cache events happen only on prefetch/branch\n"
338: "- data cache events can happen only for instructions that do memory reads\n"
339: "\nAlready prefetched instructions: %.3f%% (no hits/misses)\n",
340: 100.0 * cpu_profile.i_prefetched / cpu_profile.all.count);
341:
1.1.1.3 root 342: show_histogram("Instruction cache hits per instruction",
1.1.1.4 root 343: ARRAY_SIZE(cpu_profile.i_hit_counts), cpu_profile.i_hit_counts);
1.1.1.3 root 344: show_histogram("Instruction cache misses per instruction",
1.1.1.4 root 345: ARRAY_SIZE(cpu_profile.i_miss_counts), cpu_profile.i_miss_counts);
1.1.1.3 root 346: show_histogram("Data cache hits per instruction",
1.1.1.4 root 347: ARRAY_SIZE(cpu_profile.d_hit_counts), cpu_profile.d_hit_counts);
1.1.1.3 root 348: show_histogram("Data cache misses per instruction",
1.1.1.4 root 349: ARRAY_SIZE(cpu_profile.d_miss_counts), cpu_profile.d_miss_counts);
1.1.1.3 root 350: }
351: #else
352: void Profile_CpuShowCaches(void) {
353: fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
354: }
355: #endif
356:
357: /**
1.1 root 358: * Show CPU instructions which execution was profiled, in the address order,
359: * starting from the given address. Return next disassembly address.
360: */
1.1.1.5 root 361: Uint32 Profile_CpuShowAddresses(Uint32 lower, Uint32 upper, FILE *out, paging_t use_paging)
1.1 root 362: {
363: int oldcols[DISASM_COLUMNS], newcols[DISASM_COLUMNS];
1.1.1.5 root 364: int show, shown, addrs, active;
1.1 root 365: const char *symbol;
366: cpu_profile_item_t *data;
367: Uint32 idx, end, size;
368: uaecptr nextpc, addr;
369:
370: data = cpu_profile.data;
371: if (!data) {
372: fprintf(stderr, "ERROR: no CPU profiling data available!\n");
373: return 0;
374: }
375:
376: size = cpu_profile.size;
377: active = cpu_profile.active;
378: if (upper) {
379: end = address2index(upper);
380: if (end > size) {
381: end = size;
382: }
383: } else {
384: end = size;
1.1.1.6 ! root 385: }
! 386: show = INT_MAX;
! 387: if (use_paging == PAGING_ENABLED) {
1.1.1.5 root 388: show = DebugUI_GetPageLines(ConfigureParams.Debugger.nDisasmLines, 0);
1.1.1.6 ! root 389: if (!show) {
! 390: show = INT_MAX;
1.1 root 391: }
392: }
393:
394: /* get/change columns */
395: Disasm_GetColumns(oldcols);
396: Disasm_DisableColumn(DISASM_COLUMN_HEXDUMP, oldcols, newcols);
397: Disasm_SetColumns(newcols);
398:
1.1.1.3 root 399: fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <sum of i-cache misses>, <sum of d-cache hits>)\n", out);
1.1.1.5 root 400: shown = 2; /* first and last printf */
1.1 root 401:
1.1.1.5 root 402: addrs = nextpc = 0;
1.1 root 403: idx = address2index(lower);
1.1.1.6 ! root 404: for (; shown < show && addrs < active && idx < end; idx++) {
1.1 root 405: if (!data[idx].count) {
406: continue;
407: }
408: addr = index2address(idx);
409: if (addr != nextpc && nextpc) {
410: fprintf(out, "[...]\n");
1.1.1.5 root 411: shown++;
1.1 root 412: }
1.1.1.5 root 413: symbol = Symbols_GetByCpuAddress(addr, SYMTYPE_TEXT);
1.1 root 414: if (symbol) {
415: fprintf(out, "%s:\n", symbol);
1.1.1.5 root 416: shown++;
1.1 root 417: }
418: /* NOTE: column setup works only with 68kDisass disasm engine! */
419: Disasm(out, addr, &nextpc, 1);
420: shown++;
1.1.1.5 root 421: addrs++;
1.1 root 422: }
1.1.1.6 ! root 423: if (idx < end) {
! 424: printf("Disassembled %d (of active %d) CPU addresses.\n", addrs, active);
! 425: } else {
! 426: printf("Disassembled last %d (of active %d) CPU addresses, wrapping...\n", addrs, active);
! 427: nextpc = 0;
! 428: }
1.1 root 429: /* restore disassembly columns */
430: Disasm_SetColumns(oldcols);
431: return nextpc;
432: }
433:
434: /**
435: * remove all disassembly columns except instruction ones.
436: * data needed to restore columns is stored to "oldcols"
437: */
438: static void leave_instruction_column(int *oldcols)
439: {
440: int i, newcols[DISASM_COLUMNS];
441:
442: Disasm_GetColumns(oldcols);
443: for (i = 0; i < DISASM_COLUMNS; i++) {
444: if (i == DISASM_COLUMN_OPCODE || i == DISASM_COLUMN_OPERAND) {
445: continue;
446: }
447: Disasm_DisableColumn(i, oldcols, newcols);
448: oldcols = newcols;
449: }
450: Disasm_SetColumns(newcols);
451: }
452:
453: #if ENABLE_WINUAE_CPU
454: /**
455: * compare function for qsort() to sort CPU profile data by instruction cache misses.
456: */
1.1.1.3 root 457: static int cmp_cpu_i_misses(const void *p1, const void *p2)
1.1 root 458: {
1.1.1.3 root 459: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].i_misses;
460: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].i_misses;
1.1 root 461: if (count1 > count2) {
462: return -1;
463: }
464: if (count1 < count2) {
465: return 1;
466: }
467: return 0;
468: }
469:
470: /**
471: * Sort CPU profile data addresses by instruction cache misses and show the results.
472: */
1.1.1.3 root 473: void Profile_CpuShowInstrMisses(int show)
474: {
475: int active;
476: int oldcols[DISASM_COLUMNS];
477: Uint32 *sort_arr, *end, addr, nextpc;
478: cpu_profile_item_t *data = cpu_profile.data;
479: float percentage;
480: Uint32 count;
481:
482: if (!cpu_profile.all.i_misses) {
483: fprintf(stderr, "No CPU instruction cache miss information available.\n");
484: return;
485: }
486:
487: active = cpu_profile.active;
488: sort_arr = cpu_profile.sort_arr;
489: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_i_misses);
490:
491: leave_instruction_column(oldcols);
492:
493: printf("addr:\t\ti-cache misses:\n");
494: show = (show < active ? show : active);
495: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
496: addr = index2address(*sort_arr);
497: count = data[*sort_arr].i_misses;
498: percentage = 100.0*count/cpu_profile.all.i_misses;
499: printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
500: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
501: Disasm(stdout, addr, &nextpc, 1);
502: }
503: printf("%d CPU addresses listed.\n", show);
504:
505: Disasm_SetColumns(oldcols);
506: }
507:
508: /**
509: * compare function for qsort() to sort CPU profile data by data cache hits.
510: */
511: static int cmp_cpu_d_hits(const void *p1, const void *p2)
512: {
513: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].d_hits;
514: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].d_hits;
515: if (count1 > count2) {
516: return -1;
517: }
518: if (count1 < count2) {
519: return 1;
520: }
521: return 0;
522: }
523:
524: /**
525: * Sort CPU profile data addresses by data cache hits and show the results.
526: */
527: void Profile_CpuShowDataHits(int show)
1.1 root 528: {
529: int active;
530: int oldcols[DISASM_COLUMNS];
531: Uint32 *sort_arr, *end, addr, nextpc;
532: cpu_profile_item_t *data = cpu_profile.data;
533: float percentage;
534: Uint32 count;
535:
1.1.1.3 root 536: if (!cpu_profile.all.d_hits) {
537: fprintf(stderr, "No CPU data cache hit information available.\n");
1.1 root 538: return;
539: }
540:
541: active = cpu_profile.active;
542: sort_arr = cpu_profile.sort_arr;
1.1.1.3 root 543: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_d_hits);
1.1 root 544:
545: leave_instruction_column(oldcols);
546:
1.1.1.3 root 547: printf("addr:\t\td-cache hits:\n");
1.1 root 548: show = (show < active ? show : active);
549: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
550: addr = index2address(*sort_arr);
1.1.1.3 root 551: count = data[*sort_arr].d_hits;
552: percentage = 100.0*count/cpu_profile.all.d_hits;
1.1 root 553: printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
554: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
555: Disasm(stdout, addr, &nextpc, 1);
556: }
557: printf("%d CPU addresses listed.\n", show);
558:
559: Disasm_SetColumns(oldcols);
560: }
1.1.1.3 root 561:
1.1 root 562: #else
1.1.1.3 root 563: void Profile_CpuShowInstrMisses(int show) {
564: fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
565: }
566: void Profile_CpuShowDataHits(int show) {
567: fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
1.1 root 568: }
569: #endif
570:
571:
572: /**
573: * compare function for qsort() to sort CPU profile data by cycles counts.
574: */
575: static int cmp_cpu_cycles(const void *p1, const void *p2)
576: {
577: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].cycles;
578: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].cycles;
579: if (count1 > count2) {
580: return -1;
581: }
582: if (count1 < count2) {
583: return 1;
584: }
585: return 0;
586: }
587:
588: /**
589: * Sort CPU profile data addresses by cycle counts and show the results.
590: */
591: void Profile_CpuShowCycles(int show)
592: {
593: int active;
594: int oldcols[DISASM_COLUMNS];
595: Uint32 *sort_arr, *end, addr, nextpc;
596: cpu_profile_item_t *data = cpu_profile.data;
597: float percentage;
598: Uint32 count;
599:
600: if (!data) {
601: fprintf(stderr, "ERROR: no CPU profiling data available!\n");
602: return;
603: }
604:
605: active = cpu_profile.active;
606: sort_arr = cpu_profile.sort_arr;
607: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_cycles);
608:
609: leave_instruction_column(oldcols);
610:
611: printf("addr:\t\tcycles:\n");
612: show = (show < active ? show : active);
613: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
614: addr = index2address(*sort_arr);
615: count = data[*sort_arr].cycles;
616: percentage = 100.0*count/cpu_profile.all.cycles;
617: printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
618: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
619: Disasm(stdout, addr, &nextpc, 1);
620: }
621: printf("%d CPU addresses listed.\n", show);
622:
623: Disasm_SetColumns(oldcols);
624: }
625:
626: /**
627: * compare function for qsort() to sort CPU profile data by descending
628: * address access counts.
629: */
630: static int cmp_cpu_count(const void *p1, const void *p2)
631: {
632: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].count;
633: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].count;
634: if (count1 > count2) {
635: return -1;
636: }
637: if (count1 < count2) {
638: return 1;
639: }
640: return 0;
641: }
642:
643: /**
644: * Sort CPU profile data addresses by call counts and show the results.
645: * If symbols are requested and symbols are loaded, show (only) addresses
646: * matching a symbol.
647: */
648: void Profile_CpuShowCounts(int show, bool only_symbols)
649: {
650: cpu_profile_item_t *data = cpu_profile.data;
651: int symbols, matched, active;
652: int oldcols[DISASM_COLUMNS];
653: Uint32 *sort_arr, *end, addr, nextpc;
654: const char *name;
655: float percentage;
656: Uint32 count;
657:
658: if (!data) {
659: fprintf(stderr, "ERROR: no CPU profiling data available!\n");
660: return;
661: }
662: active = cpu_profile.active;
663: show = (show < active ? show : active);
664:
665: sort_arr = cpu_profile.sort_arr;
666: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_count);
667:
668: if (!only_symbols) {
669: leave_instruction_column(oldcols);
670: printf("addr:\t\tcount:\n");
671: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
672: addr = index2address(*sort_arr);
673: count = data[*sort_arr].count;
674: percentage = 100.0*count/cpu_profile.all.count;
675: printf("0x%06x\t%5.2f%%\t%d%s\t",
676: addr, percentage, count,
677: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
678: Disasm(stdout, addr, &nextpc, 1);
679: }
680: printf("%d CPU addresses listed.\n", show);
681: Disasm_SetColumns(oldcols);
682: return;
683: }
684:
1.1.1.5 root 685: symbols = Symbols_CpuCodeCount();
1.1 root 686: if (!symbols) {
687: fprintf(stderr, "ERROR: no CPU symbols loaded!\n");
688: return;
689: }
690: matched = 0;
691:
692: leave_instruction_column(oldcols);
693:
694: printf("addr:\t\tcount:\t\tsymbol:\n");
695: for (end = sort_arr + active; sort_arr < end; sort_arr++) {
696:
697: addr = index2address(*sort_arr);
1.1.1.5 root 698: name = Symbols_GetByCpuAddress(addr, SYMTYPE_TEXT);
1.1 root 699: if (!name) {
700: continue;
701: }
702: count = data[*sort_arr].count;
703: percentage = 100.0*count/cpu_profile.all.count;
704: printf("0x%06x\t%5.2f%%\t%d\t%s%s\t",
705: addr, percentage, count, name,
706: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
707: Disasm(stdout, addr, &nextpc, 1);
708:
709: matched++;
710: if (matched >= show || matched >= symbols) {
711: break;
712: }
713: }
714: printf("%d CPU symbols listed.\n", matched);
715:
716: Disasm_SetColumns(oldcols);
717: }
718:
719:
720: static const char * addr2name(Uint32 addr, Uint64 *total)
721: {
722: Uint32 idx = address2index(addr);
723: *total = cpu_profile.data[idx].count;
1.1.1.5 root 724: return Symbols_GetByCpuAddress(addr, SYMTYPE_TEXT);
1.1 root 725: }
726:
727: /**
728: * Output CPU callers info to given file.
729: */
730: void Profile_CpuShowCallers(FILE *fp)
731: {
732: Profile_ShowCallers(fp, cpu_callinfo.sites, cpu_callinfo.site, addr2name);
733: }
734:
735: /**
736: * Save CPU profile information to given file.
737: */
738: void Profile_CpuSave(FILE *out)
739: {
1.1.1.3 root 740: Uint32 text, end;
741: fputs("Field names:\tExecuted instructions, Used cycles, Instruction cache misses, Data cache hits\n", out);
742: /* (Python) regexp that matches address and all described fields from disassembly:
743: * $<hex> : <ASM> <percentage>% (<count>, <cycles>, <i-misses>, <d-hits>)
744: * $e5af38 : rts 0.00% (12, 0, 12, 0)
1.1 root 745: */
746: fputs("Field regexp:\t^\\$([0-9a-f]+) :.*% \\((.*)\\)$\n", out);
747: /* some information for interpreting the addresses */
1.1.1.3 root 748: fprintf(out, "ST_RAM:\t\t0x%06x-0x%06x\n", 0, STRamEnd);
749: end = TosAddress + TosSize;
750: fprintf(out, "ROM_TOS:\t0x%06x-0x%06x\n", TosAddress, end);
751: fprintf(out, "CARTRIDGE:\t0x%06x-0x%06x\n", CART_START, CART_END);
1.1 root 752: text = DebugInfo_GetTEXT();
1.1.1.3 root 753: if (text && (text < TosAddress || text >= TTRAM_START)) {
1.1 root 754: fprintf(out, "PROGRAM_TEXT:\t0x%06x-0x%06x\n", text, DebugInfo_GetTEXTEnd());
755: }
1.1.1.5 root 756: if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) {
757: end = TTRAM_START + 1024*ConfigureParams.Memory.TTRamSize_KB;
1.1.1.3 root 758: fprintf(out, "TT_RAM:\t\t0x%08x-0x%08x\n", TTRAM_START, end);
759: } else if (end < CART_END) {
760: end = CART_END;
761: }
1.1.1.5 root 762: Profile_CpuShowAddresses(0, end-2, out, PAGING_DISABLED);
1.1 root 763: Profile_CpuShowCallers(out);
764: }
765:
766: /* ------------------ CPU profile control ----------------- */
767:
768: /**
769: * Initialize CPU profiling when necessary. Return true if profiling.
770: */
771: bool Profile_CpuStart(void)
772: {
773: int size;
774:
775: Profile_FreeCallinfo(&(cpu_callinfo));
776: if (cpu_profile.sort_arr) {
777: /* remove previous results */
778: free(cpu_profile.sort_arr);
779: free(cpu_profile.data);
780: cpu_profile.sort_arr = NULL;
781: cpu_profile.data = NULL;
782: printf("Freed previous CPU profile buffers.\n");
783: }
784: if (!cpu_profile.enabled) {
785: return false;
786: }
787: /* zero everything */
788: memset(&cpu_profile, 0, sizeof(cpu_profile));
789:
790: /* Shouldn't change within same debug session */
1.1.1.3 root 791: size = (STRamEnd + CART_SIZE + TosSize) / 2;
1.1.1.5 root 792: if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) {
793: size += ConfigureParams.Memory.TTRamSize_KB * 1024/2;
1.1.1.3 root 794: }
1.1 root 795:
796: /* Add one entry for catching invalid PC values */
797: cpu_profile.data = calloc(size + 1, sizeof(*cpu_profile.data));
798: if (!cpu_profile.data) {
799: perror("ERROR, new CPU profile buffer alloc failed");
800: return false;
801: }
802: printf("Allocated CPU profile buffer (%d MB).\n",
803: (int)sizeof(*cpu_profile.data)*size/(1024*1024));
804: cpu_profile.size = size;
805:
1.1.1.5 root 806: Profile_AllocCallinfo(&(cpu_callinfo), Symbols_CpuCodeCount(), "CPU");
1.1 root 807:
808: /* special hack for EmuTOS */
809: etos_switcher = PC_UNDEFINED;
810: if (cpu_callinfo.sites && bIsEmuTOS &&
811: (!Symbols_GetCpuAddress(SYMTYPE_TEXT, "_switchto", &etos_switcher) || etos_switcher < TosAddress)) {
812: etos_switcher = PC_UNDEFINED;
813: }
814:
1.1.1.3 root 815: /* reset cache stats (CPU emulation doesn't do that) */
816: CpuInstruction.D_Cache_hit = 0;
817: CpuInstruction.I_Cache_hit = 0;
818: CpuInstruction.I_Cache_miss = 0;
819: CpuInstruction.D_Cache_miss = 0;
1.1 root 820:
1.1.1.3 root 821: cpu_profile.prev_cycles = CyclesGlobalClockCounter;
822: cpu_profile.prev_family = OpcodeFamily;
823: cpu_profile.prev_pc = M68000_GetPC();
824: if (ConfigureParams.System.bAddressSpace24) {
825: cpu_profile.prev_pc &= 0xffffff;
826: }
1.1.1.2 root 827: cpu_profile.loop_start = PC_UNDEFINED;
828: cpu_profile.loop_end = PC_UNDEFINED;
829: cpu_profile.loop_count = 0;
830: Profile_LoopReset();
831:
1.1 root 832: cpu_profile.disasm_addr = 0;
833: cpu_profile.processed = false;
834: cpu_profile.enabled = true;
835: return cpu_profile.enabled;
836: }
837:
838: /**
839: * return true if pc could be next instruction for previous pc
840: */
841: static bool is_prev_instr(Uint32 prev_pc, Uint32 pc)
842: {
843: /* just moved to next instruction (1-2 words)? */
844: if (prev_pc < pc && (pc - prev_pc) <= 10) {
845: return true;
846: }
847: return false;
848: }
849:
850: /**
851: * return caller instruction type classification
852: */
853: static calltype_t cpu_opcode_type(int family, Uint32 prev_pc, Uint32 pc)
854: {
855: switch (family) {
856:
857: case i_JSR:
858: case i_BSR:
859: return CALL_SUBROUTINE;
860:
861: case i_RTS:
862: case i_RTR:
863: case i_RTD:
864: return CALL_SUBRETURN;
865:
866: case i_JMP: /* often used also for "inlined" function calls... */
867: case i_Bcc: /* both BRA & BCC */
868: case i_FBcc:
869: case i_DBcc:
870: case i_FDBcc:
871: return CALL_BRANCH;
872:
873: case i_TRAP:
874: case i_TRAPV:
875: case i_TRAPcc:
876: case i_FTRAPcc:
877: case i_STOP:
878: case i_ILLG:
879: case i_CHK:
880: case i_CHK2:
881: case i_BKPT:
882: return CALL_EXCEPTION;
883:
884: case i_RTE:
885: return CALL_EXCRETURN;
886: }
887: /* just moved to next instruction? */
888: if (is_prev_instr(prev_pc, pc)) {
889: return CALL_NEXT;
890: }
891: return CALL_UNKNOWN;
892: }
893:
894: /**
895: * If call tracking is enabled (there are symbols), collect
896: * information about subroutine and other calls, and their costs.
897: *
898: * Like with profile data, caller info checks need to be for previous
899: * instruction, that's why "pc" argument for this function actually
900: * needs to be previous PC.
901: */
902: static void collect_calls(Uint32 pc, counters_t *counters)
903: {
904: calltype_t flag;
905: int idx, family;
906: Uint32 prev_pc, caller_pc;
907:
908: family = cpu_profile.prev_family;
909: cpu_profile.prev_family = OpcodeFamily;
910:
911: prev_pc = cpu_callinfo.prev_pc;
912: cpu_callinfo.prev_pc = pc;
913: caller_pc = PC_UNDEFINED;
914:
915: /* address is return address for last subroutine call? */
916: if (unlikely(pc == cpu_callinfo.return_pc) && likely(cpu_callinfo.depth)) {
917:
918: flag = cpu_opcode_type(family, prev_pc, pc);
1.1.1.2 root 919: /* previous address can be exception return (e.g. RTE) instead of RTS,
920: * if exception occurred right after returning from subroutine call.
1.1 root 921: */
922: if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) {
923: caller_pc = Profile_CallEnd(&cpu_callinfo, counters);
924: } else {
925: #if DEBUG
926: /* although at return address, it didn't return yet,
927: * e.g. because there was a jsr or jump to return address
928: */
929: Uint32 nextpc;
1.1.1.2 root 930: fprintf(stderr, "WARNING: subroutine call returned 0x%x -> 0x%x, not through RTS!\n", prev_pc, pc);
1.1 root 931: Disasm(stderr, prev_pc, &nextpc, 1);
932: #endif
933: }
1.1.1.2 root 934: /* next address might be another symbol, so need to fall through */
1.1 root 935: }
936:
937: /* address is one which we're tracking? */
1.1.1.5 root 938: idx = Symbols_GetCpuCodeIndex(pc);
1.1 root 939: if (unlikely(idx >= 0)) {
940:
941: flag = cpu_opcode_type(family, prev_pc, pc);
1.1.1.2 root 942: if (flag == CALL_SUBROUTINE || flag == CALL_EXCEPTION) {
1.1 root 943: /* special HACK for for EmuTOS AES switcher which
944: * changes stack content to remove itself from call
945: * stack and uses RTS for subroutine *calls*, not
946: * for returning from them.
947: *
948: * It wouldn't be reliable to detect calls from it,
949: * so I'm making call *to* it show up as branch, to
950: * keep callstack depth correct.
951: */
952: if (unlikely(pc == etos_switcher)) {
953: flag = CALL_BRANCH;
954: } else if (unlikely(prev_pc == PC_UNDEFINED)) {
955: /* if first profiled instruction
956: * is subroutine call, it doesn't have
957: * valid prev_pc value stored
958: */
959: cpu_callinfo.return_pc = PC_UNDEFINED;
1.1.1.3 root 960: fprintf(stderr, "WARNING: previous PC for tracked address 0x%d is undefined!\n", pc);
1.1 root 961: #if DEBUG
962: skip_assert = true;
963: DebugUI(REASON_CPU_EXCEPTION);
964: #endif
965: } else {
966: /* slow! */
967: cpu_callinfo.return_pc = Disasm_GetNextPC(prev_pc);
968: }
969: } else if (caller_pc != PC_UNDEFINED) {
1.1.1.2 root 970: /* returned from function to first instruction of another symbol:
1.1 root 971: * 0xf384 jsr some_function
972: * other_symbol:
973: * 0f3x8a some_instruction
974: * -> change return instruction address to
975: * address of what did the returned call.
976: */
977: prev_pc = caller_pc;
978: assert(is_prev_instr(prev_pc, pc));
979: flag = CALL_NEXT;
980: }
981: Profile_CallStart(idx, &cpu_callinfo, prev_pc, flag, pc, counters);
982: }
983: }
984:
985: /**
1.1.1.2 root 986: * log last loop info, if there's suitable data for one
987: */
988: static void log_last_loop(void)
989: {
990: unsigned len = cpu_profile.loop_end - cpu_profile.loop_start;
991: if (cpu_profile.loop_count > 1 && (len < profile_loop.cpu_limit || !profile_loop.cpu_limit)) {
992: fprintf(profile_loop.fp, "CPU %d 0x%06x %d %d\n", nVBLs,
993: cpu_profile.loop_start, len, cpu_profile.loop_count);
994: }
995: }
996:
1.1.1.3 root 997: # if DEBUG || ENABLE_WINUAE_CPU
998: /**
999: * Warning for values going out of expected range
1000: */
1001: static Uint32 warn_too_large(const char *name, const int value, const int limit, const Uint32 prev_pc, const Uint32 pc)
1002: {
1003: Uint32 nextpc;
1004: fprintf(stderr, "WARNING: unexpected (%d > %d) %s at 0x%x:\n", value, limit - 1, name, pc);
1005: Disasm(stderr, prev_pc, &nextpc, 1);
1006: Disasm(stderr, pc, &nextpc, 1);
1007: #if DEBUG
1008: skip_assert = true;
1009: DebugUI(REASON_CPU_EXCEPTION);
1010: #endif
1011: return limit - 1;
1012: }
1013: #endif
1014:
1.1.1.2 root 1015: /**
1.1 root 1016: * Update CPU cycle and count statistics for PC address.
1017: *
1018: * This gets called after instruction has executed and PC
1019: * has advanced to next instruction.
1020: */
1021: void Profile_CpuUpdate(void)
1022: {
1023: counters_t *counters = &(cpu_profile.all);
1.1.1.3 root 1024: Uint32 pc, prev_pc, idx, cycles;
1.1 root 1025: cpu_profile_item_t *prev;
1.1.1.3 root 1026: #if ENABLE_WINUAE_CPU
1027: Uint32 i_hits, d_hits, i_misses, d_misses;
1028: #else
1029: const Uint32 i_misses = 0, d_hits = 0;
1030: #endif
1.1 root 1031:
1032: prev_pc = cpu_profile.prev_pc;
1.1.1.3 root 1033: /* PC may have extra bits when using 24 bit addressing, they need to be masked away as
1.1 root 1034: * emulation itself does that too when PC value is used
1035: */
1.1.1.3 root 1036: cpu_profile.prev_pc = pc = M68000_GetPC();
1037: if (ConfigureParams.System.bAddressSpace24) {
1038: cpu_profile.prev_pc &= 0xffffff;
1039: }
1.1.1.2 root 1040: if (unlikely(profile_loop.fp)) {
1041: if (pc < prev_pc) {
1042: if (pc == cpu_profile.loop_start && prev_pc == cpu_profile.loop_end) {
1043: cpu_profile.loop_count++;
1044: } else {
1045: cpu_profile.loop_start = pc;
1046: cpu_profile.loop_end = prev_pc;
1047: cpu_profile.loop_count = 1;
1048: }
1049: } else {
1050: if (pc > cpu_profile.loop_end) {
1051: log_last_loop();
1.1.1.3 root 1052: cpu_profile.loop_end = 0xffffffff;
1.1.1.2 root 1053: cpu_profile.loop_count = 0;
1054: }
1055: }
1056: }
1057:
1.1 root 1058: idx = address2index(prev_pc);
1059: assert(idx <= cpu_profile.size);
1060: prev = cpu_profile.data + idx;
1061:
1062: if (likely(prev->count < MAX_CPU_PROFILE_VALUE)) {
1063: prev->count++;
1064: }
1065:
1.1.1.3 root 1066: cycles = CyclesGlobalClockCounter - cpu_profile.prev_cycles;
1067: cpu_profile.prev_cycles = CyclesGlobalClockCounter;
1.1 root 1068:
1069: if (likely(prev->cycles < MAX_CPU_PROFILE_VALUE - cycles)) {
1070: prev->cycles += cycles;
1071: } else {
1072: prev->cycles = MAX_CPU_PROFILE_VALUE;
1073: }
1074:
1075: #if ENABLE_WINUAE_CPU
1.1.1.3 root 1076: /* only WinUAE CPU core provides cache information */
1077: i_hits = CpuInstruction.I_Cache_hit;
1078: d_hits = CpuInstruction.D_Cache_hit;
1079: i_misses = CpuInstruction.I_Cache_miss;
1080: d_misses = CpuInstruction.D_Cache_miss;
1081:
1082: /* reset cache stats after reading them (for the next instruction) */
1083: CpuInstruction.I_Cache_hit = 0;
1084: CpuInstruction.D_Cache_hit = 0;
1085: CpuInstruction.I_Cache_miss = 0;
1086: CpuInstruction.D_Cache_miss = 0;
1087:
1088: /* tracked for every address */
1.1.1.5 root 1089: # if DEBUG_CACHE
1090: if (likely(prev->i_hits < MAX_CPU_PROFILE_VALUE - i_hits)) {
1091: prev->i_hits += i_hits;
1092: } else {
1093: prev->i_hits = MAX_CPU_PROFILE_VALUE;
1094: }
1095: if (likely(prev->d_misses < MAX_CPU_PROFILE_VALUE - d_misses)) {
1096: prev->d_misses += d_misses;
1097: } else {
1098: prev->d_misses = MAX_CPU_PROFILE_VALUE;
1099: }
1100: # endif
1.1.1.3 root 1101: if (likely(prev->i_misses < MAX_CPU_PROFILE_VALUE - i_misses)) {
1102: prev->i_misses += i_misses;
1.1 root 1103: } else {
1.1.1.3 root 1104: prev->i_misses = MAX_CPU_PROFILE_VALUE;
1.1 root 1105: }
1.1.1.3 root 1106: if (likely(prev->d_hits < MAX_CPU_PROFILE_VALUE - d_hits)) {
1107: prev->d_hits += d_hits;
1108: } else {
1109: prev->d_hits = MAX_CPU_PROFILE_VALUE;
1110: }
1111:
1112: /* tracking for histogram, check for array overflows */
1.1.1.5 root 1113: if (!(i_hits || i_misses)) {
1114: cpu_profile.i_prefetched++;
1115: }
1.1.1.3 root 1116: if (unlikely(i_hits >= MAX_I_HITS)) {
1117: i_hits = warn_too_large("number of CPU instruction cache hits", i_hits, MAX_I_HITS, prev_pc, pc);
1118: }
1119: cpu_profile.i_hit_counts[i_hits]++;
1120:
1121: if (unlikely(i_misses >= MAX_I_MISSES)) {
1122: i_misses = warn_too_large("number of CPU instruction cache misses", i_misses, MAX_I_MISSES, prev_pc, pc);
1123: }
1124: cpu_profile.i_miss_counts[i_misses]++;
1125:
1126: if (unlikely(d_hits >= MAX_D_HITS)) {
1127: d_hits = warn_too_large("number of CPU data cache hits", d_hits, MAX_D_HITS, prev_pc, pc);
1128: }
1129: cpu_profile.d_hit_counts[d_hits]++;
1130:
1131: if (unlikely(d_misses >= MAX_D_MISSES)) {
1132: d_misses = warn_too_large("number of CPU data cache misses", d_misses, MAX_D_MISSES, prev_pc, pc);
1133: }
1134: cpu_profile.d_miss_counts[d_misses]++;
1.1.1.5 root 1135: #endif /* ENABLE_WINUAE_CPU */
1.1.1.3 root 1136:
1.1 root 1137: if (cpu_callinfo.sites) {
1138: collect_calls(prev_pc, counters);
1139: }
1.1.1.5 root 1140: /* total counters are increased after caller info is processed,
1.1 root 1141: * otherwise cost for the instruction calling the callee
1142: * doesn't get accounted to caller (but callee).
1143: */
1144: counters->count++;
1.1.1.3 root 1145: counters->cycles += cycles;
1146: counters->i_misses += i_misses;
1147: counters->d_hits += d_hits;
1.1 root 1148:
1149: #if DEBUG
1150: if (unlikely(OpcodeFamily == 0)) {
1151: Uint32 nextpc;
1152: fputs("WARNING: instruction opcode family is zero (=i_ILLG) for instruction:\n", stderr);
1153: Disasm(stderr, prev_pc, &nextpc, 1);
1154: }
1155: /* catch too large (and negative) cycles for other than STOP instruction */
1156: if (unlikely(cycles > 512 && OpcodeFamily != i_STOP)) {
1.1.1.3 root 1157: warn_too_large("cycles", cycles, 512, prev_pc, pc);
1.1 root 1158: }
1.1.1.3 root 1159: # if !ENABLE_WINUAE_CPU
1160: {
1161: static Uint32 prev_cycles = 0, prev_pc2 = 0;
1162: if (unlikely(cycles == 0 && prev_cycles == 0)) {
1163: Uint32 nextpc;
1164: fputs("WARNING: Zero cycles for successive opcodes:\n", stderr);
1165: Disasm(stderr, prev_pc2, &nextpc, 1);
1166: Disasm(stderr, prev_pc, &nextpc, 1);
1167: }
1168: prev_cycles = cycles;
1169: prev_pc2 = prev_pc;
1.1 root 1170: }
1.1.1.3 root 1171: # endif
1.1 root 1172: #endif
1173: }
1174:
1175:
1176: /**
1177: * Helper for accounting CPU profile area item.
1178: */
1179: static void update_area_item(profile_area_t *area, Uint32 addr, cpu_profile_item_t *item)
1180: {
1181: Uint32 cycles = item->cycles;
1182: Uint32 count = item->count;
1183:
1184: if (!count) {
1185: return;
1186: }
1187: area->counters.count += count;
1188: area->counters.cycles += cycles;
1.1.1.3 root 1189: area->counters.i_misses += item->i_misses;
1190: area->counters.d_hits += item->d_hits;
1.1 root 1191:
1192: if (cycles == MAX_CPU_PROFILE_VALUE) {
1193: area->overflow = true;
1194: }
1195: if (addr < area->lowest) {
1196: area->lowest = addr;
1197: }
1198: area->highest = addr;
1199:
1200: area->active++;
1201: }
1202:
1203: /**
1204: * Helper for collecting CPU profile area statistics.
1205: */
1206: static Uint32 update_area(profile_area_t *area, Uint32 start, Uint32 end)
1207: {
1208: cpu_profile_item_t *item;
1209: Uint32 addr;
1210:
1211: memset(area, 0, sizeof(profile_area_t));
1212: area->lowest = cpu_profile.size;
1213:
1214: item = &(cpu_profile.data[start]);
1215: for (addr = start; addr < end; addr++, item++) {
1216: update_area_item(area, addr, item);
1217: }
1218: return addr;
1219: }
1220:
1221: /**
1222: * Helper for initializing CPU profile area sorting indexes.
1223: */
1224: static Uint32* index_area(profile_area_t *area, Uint32 *sort_arr)
1225: {
1226: cpu_profile_item_t *item;
1227: Uint32 addr;
1228:
1229: item = &(cpu_profile.data[area->lowest]);
1230: for (addr = area->lowest; addr <= area->highest; addr++, item++) {
1231: if (item->count) {
1232: *sort_arr++ = addr;
1233: }
1234: }
1235: return sort_arr;
1236: }
1237:
1238: /**
1239: * Stop and process the CPU profiling data; collect stats and
1240: * prepare for more optimal sorting.
1241: */
1242: void Profile_CpuStop(void)
1243: {
1244: Uint32 *sort_arr, next;
1.1.1.3 root 1245: unsigned int size, stsize;
1.1 root 1246: int active;
1247:
1248: if (cpu_profile.processed || !cpu_profile.enabled) {
1249: return;
1250: }
1.1.1.2 root 1251:
1252: log_last_loop();
1253: if (profile_loop.fp) {
1254: fflush(profile_loop.fp);
1255: }
1256:
1.1 root 1257: /* user didn't change RAM or TOS size in the meanwhile? */
1.1.1.3 root 1258: size = stsize = (STRamEnd + CART_SIZE + TosSize) / 2;
1.1.1.5 root 1259: if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) {
1260: size += ConfigureParams.Memory.TTRamSize_KB * 1024/2;
1.1.1.3 root 1261: }
1262: assert(cpu_profile.size == size);
1.1 root 1263:
1264: Profile_FinalizeCalls(&(cpu_callinfo), &(cpu_profile.all), Symbols_GetByCpuAddress);
1265:
1266: /* find lowest and highest addresses executed etc */
1267: next = update_area(&cpu_profile.ram, 0, STRamEnd/2);
1.1.1.3 root 1268: if (TosAddress < CART_START) {
1269: next = update_area(&cpu_profile.tos, next, (STRamEnd + TosSize)/2);
1270: next = update_area(&cpu_profile.rom, next, stsize);
1271: } else {
1272: next = update_area(&cpu_profile.rom, next, (STRamEnd + CART_SIZE)/2);
1273: next = update_area(&cpu_profile.tos, next, stsize);
1274: }
1275: next = update_area(&cpu_profile.ttram, next, size);
1276: assert(next == size);
1.1 root 1277:
1278: #if DEBUG
1279: if (skip_assert) {
1280: skip_assert = false;
1281: } else
1282: #endif
1283: {
1.1.1.3 root 1284: #if DEBUG
1285: if (cpu_profile.all.count != cpu_profile.ttram.counters.count + cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count) {
1286: fprintf(stderr, "ERROR, instruction count mismatch:\n\t%"PRIu64" != %"PRIu64" + %"PRIu64" + %"PRIu64" + %"PRIu64"?\n",
1287: cpu_profile.all.count, cpu_profile.ttram.counters.count, cpu_profile.ram.counters.count,
1288: cpu_profile.tos.counters.count, cpu_profile.rom.counters.count);
1289: fprintf(stderr, "If there was debugger invocation from profiling before this, try with profiler DEBUG define disabled!!!\n");
1290: }
1291: #endif
1292: assert(cpu_profile.all.count == cpu_profile.ttram.counters.count + cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count);
1293: assert(cpu_profile.all.cycles == cpu_profile.ttram.counters.cycles + cpu_profile.ram.counters.cycles + cpu_profile.tos.counters.cycles + cpu_profile.rom.counters.cycles);
1294: assert(cpu_profile.all.i_misses == cpu_profile.ttram.counters.i_misses + cpu_profile.ram.counters.i_misses + cpu_profile.tos.counters.i_misses + cpu_profile.rom.counters.i_misses);
1295: assert(cpu_profile.all.d_hits == cpu_profile.ttram.counters.d_hits + cpu_profile.ram.counters.d_hits + cpu_profile.tos.counters.d_hits + cpu_profile.rom.counters.d_hits);
1.1 root 1296: }
1297:
1298: /* allocate address array for sorting */
1.1.1.3 root 1299: active = cpu_profile.ttram.active + cpu_profile.ram.active + cpu_profile.rom.active + cpu_profile.tos.active;
1.1 root 1300: sort_arr = calloc(active, sizeof(*sort_arr));
1301:
1302: if (!sort_arr) {
1303: perror("ERROR: allocating CPU profile address data");
1304: free(cpu_profile.data);
1305: cpu_profile.data = NULL;
1306: return;
1307: }
1308: printf("Allocated CPU profile address buffer (%d KB).\n",
1309: (int)sizeof(*sort_arr)*(active+512)/1024);
1310: cpu_profile.sort_arr = sort_arr;
1311: cpu_profile.active = active;
1312:
1313: /* and fill addresses for used instructions... */
1314: sort_arr = index_area(&cpu_profile.ram, sort_arr);
1315: sort_arr = index_area(&cpu_profile.tos, sort_arr);
1316: sort_arr = index_area(&cpu_profile.rom, sort_arr);
1.1.1.3 root 1317: sort_arr = index_area(&cpu_profile.ttram, sort_arr);
1.1 root 1318: assert(sort_arr == cpu_profile.sort_arr + cpu_profile.active);
1319: //printf("%d/%d/%d\n", area->active, sort_arr-cpu_profile.sort_arr, active);
1320:
1321: Profile_CpuShowStats();
1322: cpu_profile.processed = true;
1323: }
1324:
1325: /**
1326: * Get pointers to CPU profile enabling and disasm address variables
1327: * for updating them (in parser).
1328: */
1329: void Profile_CpuGetPointers(bool **enabled, Uint32 **disasm_addr)
1330: {
1331: *disasm_addr = &cpu_profile.disasm_addr;
1332: *enabled = &cpu_profile.enabled;
1333: }
1334:
1335: /**
1336: * Get callinfo & symbol search pointers for stack walking.
1337: */
1.1.1.5 root 1338: void Profile_CpuGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32, symtype_t))
1.1 root 1339: {
1340: *callinfo = &(cpu_callinfo);
1341: *get_symbol = Symbols_GetByCpuAddress;
1342: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.