|
|
1.1 root 1: /*
2: * Hatari - profilecpu.c
3: *
1.1.1.3 root 4: * Copyright (C) 2010-2015 by Eero Tamminen
1.1 root 5: *
6: * This file is distributed under the GNU General Public License, version 2
7: * or at your option any later version. Read the file gpl.txt for details.
8: *
9: * profilecpu.c - functions for profiling CPU and showing the results.
10: */
11: const char Profilecpu_fileid[] = "Hatari profilecpu.c : " __DATE__ " " __TIME__;
12:
13: #include <stdio.h>
14: #include <inttypes.h>
15: #include <assert.h>
16: #include "main.h"
17: #include "configuration.h"
18: #include "clocks_timings.h"
19: #include "debugInfo.h"
20: #include "dsp.h"
21: #include "m68000.h"
22: #include "68kDisass.h"
23: #include "profile.h"
24: #include "profile_priv.h"
25: #include "stMemory.h"
26: #include "symbols.h"
27: #include "tos.h"
1.1.1.2 root 28: #include "screen.h"
29: #include "video.h"
30:
31:
32: /* cartridge area */
33: #define CART_START 0xFA0000
34: #define CART_END 0xFC0000
35: #define CART_SIZE (CART_END - CART_START)
36:
1.1.1.3 root 37: #define TTRAM_START 0x01000000
1.1 root 38:
39: /* if non-zero, output (more) warnings on suspicious:
40: * - cycle/instruction counts
41: * - PC switches
1.1.1.3 root 42: * And drop to debugger on invalid current & previous PC addresses.
43: *
44: * NOTE: DebugUI() calls that DEBUG define enables, can cause
45: * instruction count mismatch assertions because debugger invocation
46: * resets the counters AND happens in middle of data collection.
47: * It's best to quit after debugging the issue ('q' command).
1.1 root 48: */
49: #define DEBUG 0
50: #if DEBUG
51: #include "debugui.h"
52: static bool skip_assert;
53: #endif
54:
55: static callinfo_t cpu_callinfo;
56:
57: #define MAX_CPU_PROFILE_VALUE 0xFFFFFFFF
58:
59: typedef struct {
60: Uint32 count; /* how many times this address instrcution is executed */
61: Uint32 cycles; /* how many CPU cycles was taken at this address */
1.1.1.3 root 62: Uint32 i_misses; /* how many CPU instruction cache misses happened at this address */
63: Uint32 d_hits; /* how many CPU data cache hits happened at this address */
1.1 root 64: } cpu_profile_item_t;
65:
1.1.1.3 root 66: #define MAX_I_HITS 8
67: #define MAX_I_MISSES 8
68: #define MAX_D_HITS 32
69: #define MAX_D_MISSES 20
1.1 root 70:
71: static struct {
72: counters_t all; /* total counts for all areas */
73: cpu_profile_item_t *data; /* profile data items */
74: Uint32 size; /* number of allocated profile data items */
1.1.1.3 root 75: profile_area_t ttram; /* TT-RAM stats */
1.1 root 76: profile_area_t ram; /* normal RAM stats */
77: profile_area_t rom; /* cartridge ROM stats */
78: profile_area_t tos; /* ROM TOS stats */
79: int active; /* number of active data items in all areas */
80: Uint32 *sort_arr; /* data indexes used for sorting */
1.1.1.2 root 81: int prev_family; /* previous instruction opcode family */
1.1.1.3 root 82: Uint64 prev_cycles; /* previous instruction cycles counter */
1.1 root 83: Uint32 prev_pc; /* previous instruction address */
1.1.1.2 root 84: Uint32 loop_start; /* address of last loop start */
85: Uint32 loop_end; /* address of last loop end */
86: Uint32 loop_count; /* how many times it was looped */
1.1 root 87: Uint32 disasm_addr; /* 'addresses' command start address */
1.1.1.3 root 88: #if ENABLE_WINUAE_CPU
89: Uint32 i_hit_counts[MAX_I_HITS]; /* I-cache hit counts */
90: Uint32 d_hit_counts[MAX_D_HITS]; /* D-cache hit counts */
91: Uint32 i_miss_counts[MAX_I_MISSES]; /* I-cache miss counts */
92: Uint32 d_miss_counts[MAX_D_MISSES]; /* D-cache miss counts */
93: #endif
1.1 root 94: bool processed; /* true when data is already processed */
95: bool enabled; /* true when profiling enabled */
96: } cpu_profile;
97:
98: /* special hack for EmuTOS */
99: static Uint32 etos_switcher;
100:
101:
102: /* ------------------ CPU profile address mapping ----------------- */
103:
104: /**
105: * convert Atari memory address to sorting array profile data index.
106: */
107: static inline Uint32 address2index(Uint32 pc)
108: {
109: if (unlikely(pc & 1)) {
110: fprintf(stderr, "WARNING: odd CPU profile instruction address 0x%x!\n", pc);
111: #if DEBUG
112: skip_assert = true;
113: DebugUI(REASON_CPU_EXCEPTION);
114: #endif
115: }
1.1.1.2 root 116: if (pc < STRamEnd) {
117: /* most likely case, use RAM address as-is */
118:
119: } else if (pc >= TosAddress && pc < TosAddress + TosSize) {
1.1 root 120: /* TOS, put it after RAM data */
121: pc = pc - TosAddress + STRamEnd;
1.1.1.2 root 122: if (TosAddress >= CART_END) {
123: /* and after cartridge data as it's higher */
124: pc += CART_SIZE;
125: }
126: } else if (pc >= CART_START && pc < CART_END) {
127: /* ROM, put it after RAM data */
128: pc = pc - CART_START + STRamEnd;
129: if (TosAddress < CART_START) {
130: /* and after TOS as it's higher */
131: pc += TosSize;
132: }
1.1.1.3 root 133: #if ENABLE_WINUAE_CPU
134: } else if (TTmemory && pc >= TTRAM_START && pc < TTRAM_START + 1024*1024*(unsigned)ConfigureParams.Memory.nTTRamSize) {
135: pc += STRamEnd + TosSize + CART_SIZE - TTRAM_START;
136: #endif
1.1 root 137: } else {
1.1.1.2 root 138: fprintf(stderr, "WARNING: 'invalid' CPU PC profile instruction address 0x%x!\n", pc);
139: /* extra entry at end is reserved for invalid PC values */
1.1.1.3 root 140: pc = STRamEnd + TosSize + CART_SIZE;
1.1 root 141: #if DEBUG
1.1.1.2 root 142: skip_assert = true;
143: DebugUI(REASON_CPU_EXCEPTION);
1.1 root 144: #endif
145: }
146: /* CPU instructions are at even addresses, save space by halving */
147: return (pc >> 1);
148: }
149:
150: /**
151: * convert sorting array profile data index to Atari memory address.
152: */
153: static Uint32 index2address(Uint32 idx)
154: {
155: idx <<= 1;
156: /* RAM */
157: if (idx < STRamEnd) {
158: return idx;
159: }
160: idx -= STRamEnd;
1.1.1.2 root 161: /* TOS before cartridge area? */
162: if (TosAddress < CART_START) {
163: /* TOS */
164: if (idx < TosSize) {
165: return idx + TosAddress;
166: }
167: idx -= TosSize;
168: /* ROM */
1.1.1.3 root 169: if (idx < CART_SIZE) {
170: return idx + CART_START;
171: }
172: idx -= CART_SIZE;
1.1.1.2 root 173: } else {
174: /* ROM */
175: if (idx < CART_SIZE) {
176: return idx + CART_START;
177: }
178: idx -= CART_SIZE;
179: /* TOS */
1.1.1.3 root 180: if (idx < TosSize) {
181: return idx + TosAddress;
182: }
183: idx -= TosSize;
1.1 root 184: }
1.1.1.3 root 185: return idx + TTRAM_START;
1.1 root 186: }
187:
188: /* ------------------ CPU profile results ----------------- */
189:
190: /**
191: * Get CPU cycles, count and count percentage for given address.
192: * Return true if data was available and non-zero, false otherwise.
193: */
1.1.1.3 root 194: bool Profile_CpuAddressData(Uint32 addr, float *percentage, Uint32 *count, Uint32 *cycles, Uint32 *i_misses, Uint32 *d_hits)
1.1 root 195: {
196: Uint32 idx;
197: if (!cpu_profile.data) {
198: return false;
199: }
200: idx = address2index(addr);
1.1.1.3 root 201: *i_misses = cpu_profile.data[idx].i_misses;
202: *d_hits = cpu_profile.data[idx].d_hits;
1.1 root 203: *cycles = cpu_profile.data[idx].cycles;
204: *count = cpu_profile.data[idx].count;
205: if (cpu_profile.all.count) {
206: *percentage = 100.0*(*count)/cpu_profile.all.count;
207: } else {
208: *percentage = 0.0;
209: }
210: return (*count > 0);
211: }
212:
213: /**
214: * Helper to show statistics for specified CPU profile area.
215: */
216: static void show_cpu_area_stats(profile_area_t *area)
217: {
218: if (!area->active) {
219: fprintf(stderr, "- no activity\n");
220: return;
221: }
222: fprintf(stderr, "- active address range:\n 0x%06x-0x%06x\n",
223: index2address(area->lowest),
224: index2address(area->highest));
225: fprintf(stderr, "- active instruction addresses:\n %d (%.2f%% of all)\n",
226: area->active,
227: 100.0 * area->active / cpu_profile.active);
228: fprintf(stderr, "- executed instructions:\n %"PRIu64" (%.2f%% of all)\n",
229: area->counters.count,
230: 100.0 * area->counters.count / cpu_profile.all.count);
1.1.1.3 root 231: /* CPU cache in use? */
232: if (cpu_profile.all.i_misses) {
1.1 root 233: fprintf(stderr, "- instruction cache misses:\n %"PRIu64" (%.2f%% of all)\n",
1.1.1.3 root 234: area->counters.i_misses,
235: 100.0 * area->counters.i_misses / cpu_profile.all.i_misses);
236: }
237: if (cpu_profile.all.d_hits) {
238: fprintf(stderr, "- data cache hits:\n %"PRIu64" (%.2f%% of all)\n",
239: area->counters.d_hits,
240: 100.0 * area->counters.d_hits / cpu_profile.all.d_hits);
1.1 root 241: }
242: fprintf(stderr, "- used cycles:\n %"PRIu64" (%.2f%% of all)\n = %.5fs\n",
243: area->counters.cycles,
244: 100.0 * area->counters.cycles / cpu_profile.all.cycles,
245: (double)area->counters.cycles / MachineClocks.CPU_Freq);
246: if (area->overflow) {
247: fprintf(stderr, " *** COUNTER OVERFLOW! ***\n");
248: }
249: }
250:
251:
252: /**
253: * show CPU area (RAM, ROM, TOS) specific statistics.
254: */
255: void Profile_CpuShowStats(void)
256: {
257: fprintf(stderr, "Normal RAM (0-0x%X):\n", STRamEnd);
258: show_cpu_area_stats(&cpu_profile.ram);
259:
260: fprintf(stderr, "ROM TOS (0x%X-0x%X):\n", TosAddress, TosAddress + TosSize);
261: show_cpu_area_stats(&cpu_profile.tos);
262:
1.1.1.2 root 263: fprintf(stderr, "Cartridge ROM (0x%X-%X):\n", CART_START, CART_END);
1.1 root 264: show_cpu_area_stats(&cpu_profile.rom);
265:
1.1.1.3 root 266: if (TTmemory && ConfigureParams.Memory.nTTRamSize) {
267: fprintf(stderr, "TT-RAM (0x%X-%X):\n", TTRAM_START, TTRAM_START + 1024*1024*ConfigureParams.Memory.nTTRamSize);
268: show_cpu_area_stats(&cpu_profile.ttram);
269: }
270:
1.1 root 271: fprintf(stderr, "\n= %.5fs\n",
272: (double)cpu_profile.all.cycles / MachineClocks.CPU_Freq);
1.1.1.3 root 273: }
1.1 root 274:
275: #if ENABLE_WINUAE_CPU
1.1.1.3 root 276: /**
277: * show percentage histogram of given array items
278: */
279: static void show_histogram(const char *title, int count, Uint32 *items)
280: {
281: Uint64 maxval;
282: Uint32 value;
283: int i;
284:
285: fprintf(stderr, "\n%s, number of occurrencies:\n", title);
286: maxval = 0;
287: for (i = 0; i < count; i++) {
288: maxval += items[i];
289: }
290: for (i = 0; i < count; i++) {
291: value = items[i];
292: if (value) {
293: int w, width = 50 * value / maxval+1;
294: fprintf(stderr, " %2d: ", i);
295: for (w = 0; w < width; w++) {
296: fputc('#', stderr);
297: }
298: fprintf(stderr, " %.3f%%\n", 100.0 * value / maxval);
1.1 root 299: }
300: }
301: }
302:
303: /**
1.1.1.3 root 304: * show CPU cache usage histograms
305: */
306: void Profile_CpuShowCaches(void)
307: {
308: if (!(cpu_profile.all.i_misses || cpu_profile.all.d_hits)) {
309: fprintf(stderr, "No instruction/data cache information.");
310: return;
311: }
312: show_histogram("Instruction cache hits per instruction",
1.1.1.4 ! root 313: ARRAY_SIZE(cpu_profile.i_hit_counts), cpu_profile.i_hit_counts);
1.1.1.3 root 314: show_histogram("Instruction cache misses per instruction",
1.1.1.4 ! root 315: ARRAY_SIZE(cpu_profile.i_miss_counts), cpu_profile.i_miss_counts);
1.1.1.3 root 316: show_histogram("Data cache hits per instruction",
1.1.1.4 ! root 317: ARRAY_SIZE(cpu_profile.d_hit_counts), cpu_profile.d_hit_counts);
1.1.1.3 root 318: show_histogram("Data cache misses per instruction",
1.1.1.4 ! root 319: ARRAY_SIZE(cpu_profile.d_miss_counts), cpu_profile.d_miss_counts);
1.1.1.3 root 320: }
321: #else
322: void Profile_CpuShowCaches(void) {
323: fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
324: }
325: #endif
326:
327: /**
1.1 root 328: * Show CPU instructions which execution was profiled, in the address order,
329: * starting from the given address. Return next disassembly address.
330: */
331: Uint32 Profile_CpuShowAddresses(Uint32 lower, Uint32 upper, FILE *out)
332: {
333: int oldcols[DISASM_COLUMNS], newcols[DISASM_COLUMNS];
334: int show, shown, active;
335: const char *symbol;
336: cpu_profile_item_t *data;
337: Uint32 idx, end, size;
338: uaecptr nextpc, addr;
339:
340: data = cpu_profile.data;
341: if (!data) {
342: fprintf(stderr, "ERROR: no CPU profiling data available!\n");
343: return 0;
344: }
345:
346: size = cpu_profile.size;
347: active = cpu_profile.active;
348: if (upper) {
349: end = address2index(upper);
350: show = active;
351: if (end > size) {
352: end = size;
353: }
354: } else {
355: end = size;
356: show = ConfigureParams.Debugger.nDisasmLines;
357: if (!show || show > active) {
358: show = active;
359: }
360: }
361:
362: /* get/change columns */
363: Disasm_GetColumns(oldcols);
364: Disasm_DisableColumn(DISASM_COLUMN_HEXDUMP, oldcols, newcols);
365: Disasm_SetColumns(newcols);
366:
1.1.1.3 root 367: fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <sum of i-cache misses>, <sum of d-cache hits>)\n", out);
1.1 root 368:
369: nextpc = 0;
370: idx = address2index(lower);
371: for (shown = 0; shown < show && idx < end; idx++) {
372: if (!data[idx].count) {
373: continue;
374: }
375: addr = index2address(idx);
376: if (addr != nextpc && nextpc) {
377: fprintf(out, "[...]\n");
378: }
379: symbol = Symbols_GetByCpuAddress(addr);
380: if (symbol) {
381: fprintf(out, "%s:\n", symbol);
382: }
383: /* NOTE: column setup works only with 68kDisass disasm engine! */
384: Disasm(out, addr, &nextpc, 1);
385: shown++;
386: }
387: printf("Disassembled %d (of active %d) CPU addresses.\n", shown, active);
388:
389: /* restore disassembly columns */
390: Disasm_SetColumns(oldcols);
391: return nextpc;
392: }
393:
394: /**
395: * remove all disassembly columns except instruction ones.
396: * data needed to restore columns is stored to "oldcols"
397: */
398: static void leave_instruction_column(int *oldcols)
399: {
400: int i, newcols[DISASM_COLUMNS];
401:
402: Disasm_GetColumns(oldcols);
403: for (i = 0; i < DISASM_COLUMNS; i++) {
404: if (i == DISASM_COLUMN_OPCODE || i == DISASM_COLUMN_OPERAND) {
405: continue;
406: }
407: Disasm_DisableColumn(i, oldcols, newcols);
408: oldcols = newcols;
409: }
410: Disasm_SetColumns(newcols);
411: }
412:
413: #if ENABLE_WINUAE_CPU
414: /**
415: * compare function for qsort() to sort CPU profile data by instruction cache misses.
416: */
1.1.1.3 root 417: static int cmp_cpu_i_misses(const void *p1, const void *p2)
1.1 root 418: {
1.1.1.3 root 419: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].i_misses;
420: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].i_misses;
1.1 root 421: if (count1 > count2) {
422: return -1;
423: }
424: if (count1 < count2) {
425: return 1;
426: }
427: return 0;
428: }
429:
430: /**
431: * Sort CPU profile data addresses by instruction cache misses and show the results.
432: */
1.1.1.3 root 433: void Profile_CpuShowInstrMisses(int show)
434: {
435: int active;
436: int oldcols[DISASM_COLUMNS];
437: Uint32 *sort_arr, *end, addr, nextpc;
438: cpu_profile_item_t *data = cpu_profile.data;
439: float percentage;
440: Uint32 count;
441:
442: if (!cpu_profile.all.i_misses) {
443: fprintf(stderr, "No CPU instruction cache miss information available.\n");
444: return;
445: }
446:
447: active = cpu_profile.active;
448: sort_arr = cpu_profile.sort_arr;
449: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_i_misses);
450:
451: leave_instruction_column(oldcols);
452:
453: printf("addr:\t\ti-cache misses:\n");
454: show = (show < active ? show : active);
455: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
456: addr = index2address(*sort_arr);
457: count = data[*sort_arr].i_misses;
458: percentage = 100.0*count/cpu_profile.all.i_misses;
459: printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
460: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
461: Disasm(stdout, addr, &nextpc, 1);
462: }
463: printf("%d CPU addresses listed.\n", show);
464:
465: Disasm_SetColumns(oldcols);
466: }
467:
468: /**
469: * compare function for qsort() to sort CPU profile data by data cache hits.
470: */
471: static int cmp_cpu_d_hits(const void *p1, const void *p2)
472: {
473: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].d_hits;
474: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].d_hits;
475: if (count1 > count2) {
476: return -1;
477: }
478: if (count1 < count2) {
479: return 1;
480: }
481: return 0;
482: }
483:
484: /**
485: * Sort CPU profile data addresses by data cache hits and show the results.
486: */
487: void Profile_CpuShowDataHits(int show)
1.1 root 488: {
489: int active;
490: int oldcols[DISASM_COLUMNS];
491: Uint32 *sort_arr, *end, addr, nextpc;
492: cpu_profile_item_t *data = cpu_profile.data;
493: float percentage;
494: Uint32 count;
495:
1.1.1.3 root 496: if (!cpu_profile.all.d_hits) {
497: fprintf(stderr, "No CPU data cache hit information available.\n");
1.1 root 498: return;
499: }
500:
501: active = cpu_profile.active;
502: sort_arr = cpu_profile.sort_arr;
1.1.1.3 root 503: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_d_hits);
1.1 root 504:
505: leave_instruction_column(oldcols);
506:
1.1.1.3 root 507: printf("addr:\t\td-cache hits:\n");
1.1 root 508: show = (show < active ? show : active);
509: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
510: addr = index2address(*sort_arr);
1.1.1.3 root 511: count = data[*sort_arr].d_hits;
512: percentage = 100.0*count/cpu_profile.all.d_hits;
1.1 root 513: printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
514: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
515: Disasm(stdout, addr, &nextpc, 1);
516: }
517: printf("%d CPU addresses listed.\n", show);
518:
519: Disasm_SetColumns(oldcols);
520: }
1.1.1.3 root 521:
1.1 root 522: #else
1.1.1.3 root 523: void Profile_CpuShowInstrMisses(int show) {
524: fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
525: }
526: void Profile_CpuShowDataHits(int show) {
527: fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
1.1 root 528: }
529: #endif
530:
531:
532: /**
533: * compare function for qsort() to sort CPU profile data by cycles counts.
534: */
535: static int cmp_cpu_cycles(const void *p1, const void *p2)
536: {
537: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].cycles;
538: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].cycles;
539: if (count1 > count2) {
540: return -1;
541: }
542: if (count1 < count2) {
543: return 1;
544: }
545: return 0;
546: }
547:
548: /**
549: * Sort CPU profile data addresses by cycle counts and show the results.
550: */
551: void Profile_CpuShowCycles(int show)
552: {
553: int active;
554: int oldcols[DISASM_COLUMNS];
555: Uint32 *sort_arr, *end, addr, nextpc;
556: cpu_profile_item_t *data = cpu_profile.data;
557: float percentage;
558: Uint32 count;
559:
560: if (!data) {
561: fprintf(stderr, "ERROR: no CPU profiling data available!\n");
562: return;
563: }
564:
565: active = cpu_profile.active;
566: sort_arr = cpu_profile.sort_arr;
567: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_cycles);
568:
569: leave_instruction_column(oldcols);
570:
571: printf("addr:\t\tcycles:\n");
572: show = (show < active ? show : active);
573: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
574: addr = index2address(*sort_arr);
575: count = data[*sort_arr].cycles;
576: percentage = 100.0*count/cpu_profile.all.cycles;
577: printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
578: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
579: Disasm(stdout, addr, &nextpc, 1);
580: }
581: printf("%d CPU addresses listed.\n", show);
582:
583: Disasm_SetColumns(oldcols);
584: }
585:
586: /**
587: * compare function for qsort() to sort CPU profile data by descending
588: * address access counts.
589: */
590: static int cmp_cpu_count(const void *p1, const void *p2)
591: {
592: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].count;
593: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].count;
594: if (count1 > count2) {
595: return -1;
596: }
597: if (count1 < count2) {
598: return 1;
599: }
600: return 0;
601: }
602:
603: /**
604: * Sort CPU profile data addresses by call counts and show the results.
605: * If symbols are requested and symbols are loaded, show (only) addresses
606: * matching a symbol.
607: */
608: void Profile_CpuShowCounts(int show, bool only_symbols)
609: {
610: cpu_profile_item_t *data = cpu_profile.data;
611: int symbols, matched, active;
612: int oldcols[DISASM_COLUMNS];
613: Uint32 *sort_arr, *end, addr, nextpc;
614: const char *name;
615: float percentage;
616: Uint32 count;
617:
618: if (!data) {
619: fprintf(stderr, "ERROR: no CPU profiling data available!\n");
620: return;
621: }
622: active = cpu_profile.active;
623: show = (show < active ? show : active);
624:
625: sort_arr = cpu_profile.sort_arr;
626: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_count);
627:
628: if (!only_symbols) {
629: leave_instruction_column(oldcols);
630: printf("addr:\t\tcount:\n");
631: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
632: addr = index2address(*sort_arr);
633: count = data[*sort_arr].count;
634: percentage = 100.0*count/cpu_profile.all.count;
635: printf("0x%06x\t%5.2f%%\t%d%s\t",
636: addr, percentage, count,
637: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
638: Disasm(stdout, addr, &nextpc, 1);
639: }
640: printf("%d CPU addresses listed.\n", show);
641: Disasm_SetColumns(oldcols);
642: return;
643: }
644:
645: symbols = Symbols_CpuCount();
646: if (!symbols) {
647: fprintf(stderr, "ERROR: no CPU symbols loaded!\n");
648: return;
649: }
650: matched = 0;
651:
652: leave_instruction_column(oldcols);
653:
654: printf("addr:\t\tcount:\t\tsymbol:\n");
655: for (end = sort_arr + active; sort_arr < end; sort_arr++) {
656:
657: addr = index2address(*sort_arr);
658: name = Symbols_GetByCpuAddress(addr);
659: if (!name) {
660: continue;
661: }
662: count = data[*sort_arr].count;
663: percentage = 100.0*count/cpu_profile.all.count;
664: printf("0x%06x\t%5.2f%%\t%d\t%s%s\t",
665: addr, percentage, count, name,
666: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
667: Disasm(stdout, addr, &nextpc, 1);
668:
669: matched++;
670: if (matched >= show || matched >= symbols) {
671: break;
672: }
673: }
674: printf("%d CPU symbols listed.\n", matched);
675:
676: Disasm_SetColumns(oldcols);
677: }
678:
679:
680: static const char * addr2name(Uint32 addr, Uint64 *total)
681: {
682: Uint32 idx = address2index(addr);
683: *total = cpu_profile.data[idx].count;
684: return Symbols_GetByCpuAddress(addr);
685: }
686:
687: /**
688: * Output CPU callers info to given file.
689: */
690: void Profile_CpuShowCallers(FILE *fp)
691: {
692: Profile_ShowCallers(fp, cpu_callinfo.sites, cpu_callinfo.site, addr2name);
693: }
694:
695: /**
696: * Save CPU profile information to given file.
697: */
698: void Profile_CpuSave(FILE *out)
699: {
1.1.1.3 root 700: Uint32 text, end;
701: fputs("Field names:\tExecuted instructions, Used cycles, Instruction cache misses, Data cache hits\n", out);
702: /* (Python) regexp that matches address and all described fields from disassembly:
703: * $<hex> : <ASM> <percentage>% (<count>, <cycles>, <i-misses>, <d-hits>)
704: * $e5af38 : rts 0.00% (12, 0, 12, 0)
1.1 root 705: */
706: fputs("Field regexp:\t^\\$([0-9a-f]+) :.*% \\((.*)\\)$\n", out);
707: /* some information for interpreting the addresses */
1.1.1.3 root 708: fprintf(out, "ST_RAM:\t\t0x%06x-0x%06x\n", 0, STRamEnd);
709: end = TosAddress + TosSize;
710: fprintf(out, "ROM_TOS:\t0x%06x-0x%06x\n", TosAddress, end);
711: fprintf(out, "CARTRIDGE:\t0x%06x-0x%06x\n", CART_START, CART_END);
1.1 root 712: text = DebugInfo_GetTEXT();
1.1.1.3 root 713: if (text && (text < TosAddress || text >= TTRAM_START)) {
1.1 root 714: fprintf(out, "PROGRAM_TEXT:\t0x%06x-0x%06x\n", text, DebugInfo_GetTEXTEnd());
715: }
1.1.1.3 root 716: if (TTmemory && ConfigureParams.Memory.nTTRamSize) {
717: end = TTRAM_START + 1024*1024*ConfigureParams.Memory.nTTRamSize;
718: fprintf(out, "TT_RAM:\t\t0x%08x-0x%08x\n", TTRAM_START, end);
719: } else if (end < CART_END) {
720: end = CART_END;
721: }
722: Profile_CpuShowAddresses(0, end-2, out);
1.1 root 723: Profile_CpuShowCallers(out);
724: }
725:
726: /* ------------------ CPU profile control ----------------- */
727:
728: /**
729: * Initialize CPU profiling when necessary. Return true if profiling.
730: */
731: bool Profile_CpuStart(void)
732: {
733: int size;
734:
735: Profile_FreeCallinfo(&(cpu_callinfo));
736: if (cpu_profile.sort_arr) {
737: /* remove previous results */
738: free(cpu_profile.sort_arr);
739: free(cpu_profile.data);
740: cpu_profile.sort_arr = NULL;
741: cpu_profile.data = NULL;
742: printf("Freed previous CPU profile buffers.\n");
743: }
744: if (!cpu_profile.enabled) {
745: return false;
746: }
747: /* zero everything */
748: memset(&cpu_profile, 0, sizeof(cpu_profile));
749:
750: /* Shouldn't change within same debug session */
1.1.1.3 root 751: size = (STRamEnd + CART_SIZE + TosSize) / 2;
752: if (TTmemory && ConfigureParams.Memory.nTTRamSize) {
753: size += ConfigureParams.Memory.nTTRamSize * 1024*1024/2;
754: }
1.1 root 755:
756: /* Add one entry for catching invalid PC values */
757: cpu_profile.data = calloc(size + 1, sizeof(*cpu_profile.data));
758: if (!cpu_profile.data) {
759: perror("ERROR, new CPU profile buffer alloc failed");
760: return false;
761: }
762: printf("Allocated CPU profile buffer (%d MB).\n",
763: (int)sizeof(*cpu_profile.data)*size/(1024*1024));
764: cpu_profile.size = size;
765:
766: Profile_AllocCallinfo(&(cpu_callinfo), Symbols_CpuCount(), "CPU");
767:
768: /* special hack for EmuTOS */
769: etos_switcher = PC_UNDEFINED;
770: if (cpu_callinfo.sites && bIsEmuTOS &&
771: (!Symbols_GetCpuAddress(SYMTYPE_TEXT, "_switchto", &etos_switcher) || etos_switcher < TosAddress)) {
772: etos_switcher = PC_UNDEFINED;
773: }
774:
1.1.1.3 root 775: /* reset cache stats (CPU emulation doesn't do that) */
776: CpuInstruction.D_Cache_hit = 0;
777: CpuInstruction.I_Cache_hit = 0;
778: CpuInstruction.I_Cache_miss = 0;
779: CpuInstruction.D_Cache_miss = 0;
1.1 root 780:
1.1.1.3 root 781: cpu_profile.prev_cycles = CyclesGlobalClockCounter;
782: cpu_profile.prev_family = OpcodeFamily;
783: cpu_profile.prev_pc = M68000_GetPC();
784: if (ConfigureParams.System.bAddressSpace24) {
785: cpu_profile.prev_pc &= 0xffffff;
786: }
1.1.1.2 root 787: cpu_profile.loop_start = PC_UNDEFINED;
788: cpu_profile.loop_end = PC_UNDEFINED;
789: cpu_profile.loop_count = 0;
790: Profile_LoopReset();
791:
1.1 root 792: cpu_profile.disasm_addr = 0;
793: cpu_profile.processed = false;
794: cpu_profile.enabled = true;
795: return cpu_profile.enabled;
796: }
797:
798: /**
799: * return true if pc could be next instruction for previous pc
800: */
801: static bool is_prev_instr(Uint32 prev_pc, Uint32 pc)
802: {
803: /* just moved to next instruction (1-2 words)? */
804: if (prev_pc < pc && (pc - prev_pc) <= 10) {
805: return true;
806: }
807: return false;
808: }
809:
810: /**
811: * return caller instruction type classification
812: */
813: static calltype_t cpu_opcode_type(int family, Uint32 prev_pc, Uint32 pc)
814: {
815: switch (family) {
816:
817: case i_JSR:
818: case i_BSR:
819: return CALL_SUBROUTINE;
820:
821: case i_RTS:
822: case i_RTR:
823: case i_RTD:
824: return CALL_SUBRETURN;
825:
826: case i_JMP: /* often used also for "inlined" function calls... */
827: case i_Bcc: /* both BRA & BCC */
828: case i_FBcc:
829: case i_DBcc:
830: case i_FDBcc:
831: return CALL_BRANCH;
832:
833: case i_TRAP:
834: case i_TRAPV:
835: case i_TRAPcc:
836: case i_FTRAPcc:
837: case i_STOP:
838: case i_ILLG:
839: case i_CHK:
840: case i_CHK2:
841: case i_BKPT:
842: return CALL_EXCEPTION;
843:
844: case i_RTE:
845: return CALL_EXCRETURN;
846: }
847: /* just moved to next instruction? */
848: if (is_prev_instr(prev_pc, pc)) {
849: return CALL_NEXT;
850: }
851: return CALL_UNKNOWN;
852: }
853:
854: /**
855: * If call tracking is enabled (there are symbols), collect
856: * information about subroutine and other calls, and their costs.
857: *
858: * Like with profile data, caller info checks need to be for previous
859: * instruction, that's why "pc" argument for this function actually
860: * needs to be previous PC.
861: */
862: static void collect_calls(Uint32 pc, counters_t *counters)
863: {
864: calltype_t flag;
865: int idx, family;
866: Uint32 prev_pc, caller_pc;
867:
868: family = cpu_profile.prev_family;
869: cpu_profile.prev_family = OpcodeFamily;
870:
871: prev_pc = cpu_callinfo.prev_pc;
872: cpu_callinfo.prev_pc = pc;
873: caller_pc = PC_UNDEFINED;
874:
875: /* address is return address for last subroutine call? */
876: if (unlikely(pc == cpu_callinfo.return_pc) && likely(cpu_callinfo.depth)) {
877:
878: flag = cpu_opcode_type(family, prev_pc, pc);
1.1.1.2 root 879: /* previous address can be exception return (e.g. RTE) instead of RTS,
880: * if exception occurred right after returning from subroutine call.
1.1 root 881: */
882: if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) {
883: caller_pc = Profile_CallEnd(&cpu_callinfo, counters);
884: } else {
885: #if DEBUG
886: /* although at return address, it didn't return yet,
887: * e.g. because there was a jsr or jump to return address
888: */
889: Uint32 nextpc;
1.1.1.2 root 890: fprintf(stderr, "WARNING: subroutine call returned 0x%x -> 0x%x, not through RTS!\n", prev_pc, pc);
1.1 root 891: Disasm(stderr, prev_pc, &nextpc, 1);
892: #endif
893: }
1.1.1.2 root 894: /* next address might be another symbol, so need to fall through */
1.1 root 895: }
896:
897: /* address is one which we're tracking? */
898: idx = Symbols_GetCpuAddressIndex(pc);
899: if (unlikely(idx >= 0)) {
900:
901: flag = cpu_opcode_type(family, prev_pc, pc);
1.1.1.2 root 902: if (flag == CALL_SUBROUTINE || flag == CALL_EXCEPTION) {
1.1 root 903: /* special HACK for for EmuTOS AES switcher which
904: * changes stack content to remove itself from call
905: * stack and uses RTS for subroutine *calls*, not
906: * for returning from them.
907: *
908: * It wouldn't be reliable to detect calls from it,
909: * so I'm making call *to* it show up as branch, to
910: * keep callstack depth correct.
911: */
912: if (unlikely(pc == etos_switcher)) {
913: flag = CALL_BRANCH;
914: } else if (unlikely(prev_pc == PC_UNDEFINED)) {
915: /* if first profiled instruction
916: * is subroutine call, it doesn't have
917: * valid prev_pc value stored
918: */
919: cpu_callinfo.return_pc = PC_UNDEFINED;
1.1.1.3 root 920: fprintf(stderr, "WARNING: previous PC for tracked address 0x%d is undefined!\n", pc);
1.1 root 921: #if DEBUG
922: skip_assert = true;
923: DebugUI(REASON_CPU_EXCEPTION);
924: #endif
925: } else {
926: /* slow! */
927: cpu_callinfo.return_pc = Disasm_GetNextPC(prev_pc);
928: }
929: } else if (caller_pc != PC_UNDEFINED) {
1.1.1.2 root 930: /* returned from function to first instruction of another symbol:
1.1 root 931: * 0xf384 jsr some_function
932: * other_symbol:
933: * 0f3x8a some_instruction
934: * -> change return instruction address to
935: * address of what did the returned call.
936: */
937: prev_pc = caller_pc;
938: assert(is_prev_instr(prev_pc, pc));
939: flag = CALL_NEXT;
940: }
941: Profile_CallStart(idx, &cpu_callinfo, prev_pc, flag, pc, counters);
942: }
943: }
944:
945: /**
1.1.1.2 root 946: * log last loop info, if there's suitable data for one
947: */
948: static void log_last_loop(void)
949: {
950: unsigned len = cpu_profile.loop_end - cpu_profile.loop_start;
951: if (cpu_profile.loop_count > 1 && (len < profile_loop.cpu_limit || !profile_loop.cpu_limit)) {
952: fprintf(profile_loop.fp, "CPU %d 0x%06x %d %d\n", nVBLs,
953: cpu_profile.loop_start, len, cpu_profile.loop_count);
954: }
955: }
956:
1.1.1.3 root 957: # if DEBUG || ENABLE_WINUAE_CPU
958: /**
959: * Warning for values going out of expected range
960: */
961: static Uint32 warn_too_large(const char *name, const int value, const int limit, const Uint32 prev_pc, const Uint32 pc)
962: {
963: Uint32 nextpc;
964: fprintf(stderr, "WARNING: unexpected (%d > %d) %s at 0x%x:\n", value, limit - 1, name, pc);
965: Disasm(stderr, prev_pc, &nextpc, 1);
966: Disasm(stderr, pc, &nextpc, 1);
967: #if DEBUG
968: skip_assert = true;
969: DebugUI(REASON_CPU_EXCEPTION);
970: #endif
971: return limit - 1;
972: }
973: #endif
974:
1.1.1.2 root 975: /**
1.1 root 976: * Update CPU cycle and count statistics for PC address.
977: *
978: * This gets called after instruction has executed and PC
979: * has advanced to next instruction.
980: */
981: void Profile_CpuUpdate(void)
982: {
983: counters_t *counters = &(cpu_profile.all);
1.1.1.3 root 984: Uint32 pc, prev_pc, idx, cycles;
1.1 root 985: cpu_profile_item_t *prev;
1.1.1.3 root 986: #if ENABLE_WINUAE_CPU
987: Uint32 i_hits, d_hits, i_misses, d_misses;
988: #else
989: const Uint32 i_misses = 0, d_hits = 0;
990: #endif
1.1 root 991:
992: prev_pc = cpu_profile.prev_pc;
1.1.1.3 root 993: /* PC may have extra bits when using 24 bit addressing, they need to be masked away as
1.1 root 994: * emulation itself does that too when PC value is used
995: */
1.1.1.3 root 996: cpu_profile.prev_pc = pc = M68000_GetPC();
997: if (ConfigureParams.System.bAddressSpace24) {
998: cpu_profile.prev_pc &= 0xffffff;
999: }
1.1.1.2 root 1000: if (unlikely(profile_loop.fp)) {
1001: if (pc < prev_pc) {
1002: if (pc == cpu_profile.loop_start && prev_pc == cpu_profile.loop_end) {
1003: cpu_profile.loop_count++;
1004: } else {
1005: cpu_profile.loop_start = pc;
1006: cpu_profile.loop_end = prev_pc;
1007: cpu_profile.loop_count = 1;
1008: }
1009: } else {
1010: if (pc > cpu_profile.loop_end) {
1011: log_last_loop();
1.1.1.3 root 1012: cpu_profile.loop_end = 0xffffffff;
1.1.1.2 root 1013: cpu_profile.loop_count = 0;
1014: }
1015: }
1016: }
1017:
1.1 root 1018: idx = address2index(prev_pc);
1019: assert(idx <= cpu_profile.size);
1020: prev = cpu_profile.data + idx;
1021:
1022: if (likely(prev->count < MAX_CPU_PROFILE_VALUE)) {
1023: prev->count++;
1024: }
1025:
1.1.1.3 root 1026: cycles = CyclesGlobalClockCounter - cpu_profile.prev_cycles;
1027: cpu_profile.prev_cycles = CyclesGlobalClockCounter;
1.1 root 1028:
1029: if (likely(prev->cycles < MAX_CPU_PROFILE_VALUE - cycles)) {
1030: prev->cycles += cycles;
1031: } else {
1032: prev->cycles = MAX_CPU_PROFILE_VALUE;
1033: }
1034:
1035: #if ENABLE_WINUAE_CPU
1.1.1.3 root 1036: /* only WinUAE CPU core provides cache information */
1037: i_hits = CpuInstruction.I_Cache_hit;
1038: d_hits = CpuInstruction.D_Cache_hit;
1039: i_misses = CpuInstruction.I_Cache_miss;
1040: d_misses = CpuInstruction.D_Cache_miss;
1041:
1042: /* reset cache stats after reading them (for the next instruction) */
1043: CpuInstruction.I_Cache_hit = 0;
1044: CpuInstruction.D_Cache_hit = 0;
1045: CpuInstruction.I_Cache_miss = 0;
1046: CpuInstruction.D_Cache_miss = 0;
1047:
1048: /* tracked for every address */
1049: if (likely(prev->i_misses < MAX_CPU_PROFILE_VALUE - i_misses)) {
1050: prev->i_misses += i_misses;
1.1 root 1051: } else {
1.1.1.3 root 1052: prev->i_misses = MAX_CPU_PROFILE_VALUE;
1.1 root 1053: }
1.1.1.3 root 1054: if (likely(prev->d_hits < MAX_CPU_PROFILE_VALUE - d_hits)) {
1055: prev->d_hits += d_hits;
1056: } else {
1057: prev->d_hits = MAX_CPU_PROFILE_VALUE;
1058: }
1059:
1060: /* tracking for histogram, check for array overflows */
1061: if (unlikely(i_hits >= MAX_I_HITS)) {
1062: i_hits = warn_too_large("number of CPU instruction cache hits", i_hits, MAX_I_HITS, prev_pc, pc);
1063: }
1064: cpu_profile.i_hit_counts[i_hits]++;
1065:
1066: if (unlikely(i_misses >= MAX_I_MISSES)) {
1067: i_misses = warn_too_large("number of CPU instruction cache misses", i_misses, MAX_I_MISSES, prev_pc, pc);
1068: }
1069: cpu_profile.i_miss_counts[i_misses]++;
1070:
1071: if (unlikely(d_hits >= MAX_D_HITS)) {
1072: d_hits = warn_too_large("number of CPU data cache hits", d_hits, MAX_D_HITS, prev_pc, pc);
1073: }
1074: cpu_profile.d_hit_counts[d_hits]++;
1075:
1076: if (unlikely(d_misses >= MAX_D_MISSES)) {
1077: d_misses = warn_too_large("number of CPU data cache misses", d_misses, MAX_D_MISSES, prev_pc, pc);
1078: }
1079: cpu_profile.d_miss_counts[d_misses]++;
1.1 root 1080: #endif
1.1.1.3 root 1081:
1.1 root 1082: if (cpu_callinfo.sites) {
1083: collect_calls(prev_pc, counters);
1084: }
1085: /* counters are increased after caller info is processed,
1086: * otherwise cost for the instruction calling the callee
1087: * doesn't get accounted to caller (but callee).
1088: */
1089: counters->count++;
1.1.1.3 root 1090: counters->cycles += cycles;
1091: counters->i_misses += i_misses;
1092: counters->d_hits += d_hits;
1.1 root 1093:
1094: #if DEBUG
1095: if (unlikely(OpcodeFamily == 0)) {
1096: Uint32 nextpc;
1097: fputs("WARNING: instruction opcode family is zero (=i_ILLG) for instruction:\n", stderr);
1098: Disasm(stderr, prev_pc, &nextpc, 1);
1099: }
1100: /* catch too large (and negative) cycles for other than STOP instruction */
1101: if (unlikely(cycles > 512 && OpcodeFamily != i_STOP)) {
1.1.1.3 root 1102: warn_too_large("cycles", cycles, 512, prev_pc, pc);
1.1 root 1103: }
1.1.1.3 root 1104: # if !ENABLE_WINUAE_CPU
1105: {
1106: static Uint32 prev_cycles = 0, prev_pc2 = 0;
1107: if (unlikely(cycles == 0 && prev_cycles == 0)) {
1108: Uint32 nextpc;
1109: fputs("WARNING: Zero cycles for successive opcodes:\n", stderr);
1110: Disasm(stderr, prev_pc2, &nextpc, 1);
1111: Disasm(stderr, prev_pc, &nextpc, 1);
1112: }
1113: prev_cycles = cycles;
1114: prev_pc2 = prev_pc;
1.1 root 1115: }
1.1.1.3 root 1116: # endif
1.1 root 1117: #endif
1118: }
1119:
1120:
1121: /**
1122: * Helper for accounting CPU profile area item.
1123: */
1124: static void update_area_item(profile_area_t *area, Uint32 addr, cpu_profile_item_t *item)
1125: {
1126: Uint32 cycles = item->cycles;
1127: Uint32 count = item->count;
1128:
1129: if (!count) {
1130: return;
1131: }
1132: area->counters.count += count;
1133: area->counters.cycles += cycles;
1.1.1.3 root 1134: area->counters.i_misses += item->i_misses;
1135: area->counters.d_hits += item->d_hits;
1.1 root 1136:
1137: if (cycles == MAX_CPU_PROFILE_VALUE) {
1138: area->overflow = true;
1139: }
1140: if (addr < area->lowest) {
1141: area->lowest = addr;
1142: }
1143: area->highest = addr;
1144:
1145: area->active++;
1146: }
1147:
1148: /**
1149: * Helper for collecting CPU profile area statistics.
1150: */
1151: static Uint32 update_area(profile_area_t *area, Uint32 start, Uint32 end)
1152: {
1153: cpu_profile_item_t *item;
1154: Uint32 addr;
1155:
1156: memset(area, 0, sizeof(profile_area_t));
1157: area->lowest = cpu_profile.size;
1158:
1159: item = &(cpu_profile.data[start]);
1160: for (addr = start; addr < end; addr++, item++) {
1161: update_area_item(area, addr, item);
1162: }
1163: return addr;
1164: }
1165:
1166: /**
1167: * Helper for initializing CPU profile area sorting indexes.
1168: */
1169: static Uint32* index_area(profile_area_t *area, Uint32 *sort_arr)
1170: {
1171: cpu_profile_item_t *item;
1172: Uint32 addr;
1173:
1174: item = &(cpu_profile.data[area->lowest]);
1175: for (addr = area->lowest; addr <= area->highest; addr++, item++) {
1176: if (item->count) {
1177: *sort_arr++ = addr;
1178: }
1179: }
1180: return sort_arr;
1181: }
1182:
1183: /**
1184: * Stop and process the CPU profiling data; collect stats and
1185: * prepare for more optimal sorting.
1186: */
1187: void Profile_CpuStop(void)
1188: {
1189: Uint32 *sort_arr, next;
1.1.1.3 root 1190: unsigned int size, stsize;
1.1 root 1191: int active;
1192:
1193: if (cpu_profile.processed || !cpu_profile.enabled) {
1194: return;
1195: }
1.1.1.2 root 1196:
1197: log_last_loop();
1198: if (profile_loop.fp) {
1199: fflush(profile_loop.fp);
1200: }
1201:
1.1 root 1202: /* user didn't change RAM or TOS size in the meanwhile? */
1.1.1.3 root 1203: size = stsize = (STRamEnd + CART_SIZE + TosSize) / 2;
1204: if (TTmemory && ConfigureParams.Memory.nTTRamSize) {
1205: size += ConfigureParams.Memory.nTTRamSize * 1024*1024/2;
1206: }
1207: assert(cpu_profile.size == size);
1.1 root 1208:
1209: Profile_FinalizeCalls(&(cpu_callinfo), &(cpu_profile.all), Symbols_GetByCpuAddress);
1210:
1211: /* find lowest and highest addresses executed etc */
1212: next = update_area(&cpu_profile.ram, 0, STRamEnd/2);
1.1.1.3 root 1213: if (TosAddress < CART_START) {
1214: next = update_area(&cpu_profile.tos, next, (STRamEnd + TosSize)/2);
1215: next = update_area(&cpu_profile.rom, next, stsize);
1216: } else {
1217: next = update_area(&cpu_profile.rom, next, (STRamEnd + CART_SIZE)/2);
1218: next = update_area(&cpu_profile.tos, next, stsize);
1219: }
1220: next = update_area(&cpu_profile.ttram, next, size);
1221: assert(next == size);
1.1 root 1222:
1223: #if DEBUG
1224: if (skip_assert) {
1225: skip_assert = false;
1226: } else
1227: #endif
1228: {
1.1.1.3 root 1229: #if DEBUG
1230: if (cpu_profile.all.count != cpu_profile.ttram.counters.count + cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count) {
1231: fprintf(stderr, "ERROR, instruction count mismatch:\n\t%"PRIu64" != %"PRIu64" + %"PRIu64" + %"PRIu64" + %"PRIu64"?\n",
1232: cpu_profile.all.count, cpu_profile.ttram.counters.count, cpu_profile.ram.counters.count,
1233: cpu_profile.tos.counters.count, cpu_profile.rom.counters.count);
1234: fprintf(stderr, "If there was debugger invocation from profiling before this, try with profiler DEBUG define disabled!!!\n");
1235: }
1236: #endif
1237: assert(cpu_profile.all.count == cpu_profile.ttram.counters.count + cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count);
1238: assert(cpu_profile.all.cycles == cpu_profile.ttram.counters.cycles + cpu_profile.ram.counters.cycles + cpu_profile.tos.counters.cycles + cpu_profile.rom.counters.cycles);
1239: assert(cpu_profile.all.i_misses == cpu_profile.ttram.counters.i_misses + cpu_profile.ram.counters.i_misses + cpu_profile.tos.counters.i_misses + cpu_profile.rom.counters.i_misses);
1240: assert(cpu_profile.all.d_hits == cpu_profile.ttram.counters.d_hits + cpu_profile.ram.counters.d_hits + cpu_profile.tos.counters.d_hits + cpu_profile.rom.counters.d_hits);
1.1 root 1241: }
1242:
1243: /* allocate address array for sorting */
1.1.1.3 root 1244: active = cpu_profile.ttram.active + cpu_profile.ram.active + cpu_profile.rom.active + cpu_profile.tos.active;
1.1 root 1245: sort_arr = calloc(active, sizeof(*sort_arr));
1246:
1247: if (!sort_arr) {
1248: perror("ERROR: allocating CPU profile address data");
1249: free(cpu_profile.data);
1250: cpu_profile.data = NULL;
1251: return;
1252: }
1253: printf("Allocated CPU profile address buffer (%d KB).\n",
1254: (int)sizeof(*sort_arr)*(active+512)/1024);
1255: cpu_profile.sort_arr = sort_arr;
1256: cpu_profile.active = active;
1257:
1258: /* and fill addresses for used instructions... */
1259: sort_arr = index_area(&cpu_profile.ram, sort_arr);
1260: sort_arr = index_area(&cpu_profile.tos, sort_arr);
1261: sort_arr = index_area(&cpu_profile.rom, sort_arr);
1.1.1.3 root 1262: sort_arr = index_area(&cpu_profile.ttram, sort_arr);
1.1 root 1263: assert(sort_arr == cpu_profile.sort_arr + cpu_profile.active);
1264: //printf("%d/%d/%d\n", area->active, sort_arr-cpu_profile.sort_arr, active);
1265:
1266: Profile_CpuShowStats();
1267: cpu_profile.processed = true;
1268: }
1269:
1270: /**
1271: * Get pointers to CPU profile enabling and disasm address variables
1272: * for updating them (in parser).
1273: */
1274: void Profile_CpuGetPointers(bool **enabled, Uint32 **disasm_addr)
1275: {
1276: *disasm_addr = &cpu_profile.disasm_addr;
1277: *enabled = &cpu_profile.enabled;
1278: }
1279:
1280: /**
1281: * Get callinfo & symbol search pointers for stack walking.
1282: */
1283: void Profile_CpuGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32))
1284: {
1285: *callinfo = &(cpu_callinfo);
1286: *get_symbol = Symbols_GetByCpuAddress;
1287: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.