|
|
1.1 root 1: /*
2: * Hatari - profilecpu.c
3: *
4: * Copyright (C) 2010-2013 by Eero Tamminen
5: *
6: * This file is distributed under the GNU General Public License, version 2
7: * or at your option any later version. Read the file gpl.txt for details.
8: *
9: * profilecpu.c - functions for profiling CPU and showing the results.
10: */
11: const char Profilecpu_fileid[] = "Hatari profilecpu.c : " __DATE__ " " __TIME__;
12:
13: #include <stdio.h>
14: #include <inttypes.h>
15: #include <assert.h>
16: #include "main.h"
17: #include "configuration.h"
18: #include "clocks_timings.h"
19: #include "debugInfo.h"
20: #include "dsp.h"
21: #include "m68000.h"
22: #include "68kDisass.h"
23: #include "profile.h"
24: #include "profile_priv.h"
25: #include "stMemory.h"
26: #include "symbols.h"
27: #include "tos.h"
28:
29: /* if non-zero, output (more) warnings on suspicious:
30: * - cycle/instruction counts
31: * - PC switches
32: * And drop to debugger on invalid PC addresses.
33: */
34: #define DEBUG 0
35: #if DEBUG
36: #include "debugui.h"
37: static bool skip_assert;
38: #endif
39:
40: static callinfo_t cpu_callinfo;
41:
42: /* This is relevant with WinUAE CPU core:
43: * - the default cycle exact variant needs this define to be non-zero
44: * - non-cycle exact and MMU variants need this define to be 0
45: * for cycle counts to make any sense
46: */
47: #define USE_CYCLES_COUNTER 1
48:
49: #define MAX_CPU_PROFILE_VALUE 0xFFFFFFFF
50:
51: typedef struct {
52: Uint32 count; /* how many times this address instrcution is executed */
53: Uint32 cycles; /* how many CPU cycles was taken at this address */
54: Uint32 misses; /* how many CPU cache misses happened at this address */
55: } cpu_profile_item_t;
56:
57: #define MAX_MISS 4
58:
59: static struct {
60: counters_t all; /* total counts for all areas */
61: Uint32 miss_counts[MAX_MISS]; /* cache miss counts */
62: cpu_profile_item_t *data; /* profile data items */
63: Uint32 size; /* number of allocated profile data items */
64: profile_area_t ram; /* normal RAM stats */
65: profile_area_t rom; /* cartridge ROM stats */
66: profile_area_t tos; /* ROM TOS stats */
67: int active; /* number of active data items in all areas */
68: Uint32 *sort_arr; /* data indexes used for sorting */
69: Uint32 prev_cycles; /* previous instruction cycles counter */
70: Uint32 prev_pc; /* previous instruction address */
71: int prev_family; /* previous instruction opcode family */
72: Uint32 disasm_addr; /* 'addresses' command start address */
73: bool processed; /* true when data is already processed */
74: bool enabled; /* true when profiling enabled */
75: } cpu_profile;
76:
77: /* special hack for EmuTOS */
78: static Uint32 etos_switcher;
79:
80:
81: /* ------------------ CPU profile address mapping ----------------- */
82:
83: /**
84: * convert Atari memory address to sorting array profile data index.
85: */
86: static inline Uint32 address2index(Uint32 pc)
87: {
88: if (unlikely(pc & 1)) {
89: fprintf(stderr, "WARNING: odd CPU profile instruction address 0x%x!\n", pc);
90: #if DEBUG
91: skip_assert = true;
92: DebugUI(REASON_CPU_EXCEPTION);
93: #endif
94: }
95: if (pc >= TosAddress && pc < TosAddress + TosSize) {
96: /* TOS, put it after RAM data */
97: pc = pc - TosAddress + STRamEnd;
98:
99: } else if (pc >= 0xFA0000 && pc < 0xFC0000) {
100: /* ROM, put it after RAM & TOS data */
101: pc = pc - 0xFA0000 + STRamEnd + TosSize;
102:
103: } else {
104: /* if in RAM, use as-is */
105: if (unlikely(pc >= STRamEnd)) {
106: fprintf(stderr, "WARNING: 'invalid' CPU PC profile instruction address 0x%x!\n", pc);
107: /* extra entry at end is reserved for invalid PC values */
108: pc = STRamEnd + TosSize + 0x20000;
109: #if DEBUG
110: skip_assert = true;
111: DebugUI(REASON_CPU_EXCEPTION);
112: #endif
113: }
114: }
115: /* CPU instructions are at even addresses, save space by halving */
116: return (pc >> 1);
117: }
118:
119: /**
120: * convert sorting array profile data index to Atari memory address.
121: */
122: static Uint32 index2address(Uint32 idx)
123: {
124: idx <<= 1;
125: /* RAM */
126: if (idx < STRamEnd) {
127: return idx;
128: }
129: /* TOS */
130: idx -= STRamEnd;
131: if (idx < TosSize) {
132: return idx + TosAddress;
133: }
134: /* ROM */
135: return idx - TosSize + 0xFA0000;
136: }
137:
138: /* ------------------ CPU profile results ----------------- */
139:
140: /**
141: * Get CPU cycles, count and count percentage for given address.
142: * Return true if data was available and non-zero, false otherwise.
143: */
144: bool Profile_CpuAddressData(Uint32 addr, float *percentage, Uint32 *count, Uint32 *cycles, Uint32 *misses)
145: {
146: Uint32 idx;
147: if (!cpu_profile.data) {
148: return false;
149: }
150: idx = address2index(addr);
151: *misses = cpu_profile.data[idx].misses;
152: *cycles = cpu_profile.data[idx].cycles;
153: *count = cpu_profile.data[idx].count;
154: if (cpu_profile.all.count) {
155: *percentage = 100.0*(*count)/cpu_profile.all.count;
156: } else {
157: *percentage = 0.0;
158: }
159: return (*count > 0);
160: }
161:
162: /**
163: * Helper to show statistics for specified CPU profile area.
164: */
165: static void show_cpu_area_stats(profile_area_t *area)
166: {
167: if (!area->active) {
168: fprintf(stderr, "- no activity\n");
169: return;
170: }
171: fprintf(stderr, "- active address range:\n 0x%06x-0x%06x\n",
172: index2address(area->lowest),
173: index2address(area->highest));
174: fprintf(stderr, "- active instruction addresses:\n %d (%.2f%% of all)\n",
175: area->active,
176: 100.0 * area->active / cpu_profile.active);
177: fprintf(stderr, "- executed instructions:\n %"PRIu64" (%.2f%% of all)\n",
178: area->counters.count,
179: 100.0 * area->counters.count / cpu_profile.all.count);
180: #if ENABLE_WINUAE_CPU
181: if (cpu_profile.all.misses) { /* CPU cache in use? */
182: fprintf(stderr, "- instruction cache misses:\n %"PRIu64" (%.2f%% of all)\n",
183: area->counters.misses,
184: 100.0 * area->counters.misses / cpu_profile.all.misses);
185: }
186: #endif
187: fprintf(stderr, "- used cycles:\n %"PRIu64" (%.2f%% of all)\n = %.5fs\n",
188: area->counters.cycles,
189: 100.0 * area->counters.cycles / cpu_profile.all.cycles,
190: (double)area->counters.cycles / MachineClocks.CPU_Freq);
191: if (area->overflow) {
192: fprintf(stderr, " *** COUNTER OVERFLOW! ***\n");
193: }
194: }
195:
196:
197: /**
198: * show CPU area (RAM, ROM, TOS) specific statistics.
199: */
200: void Profile_CpuShowStats(void)
201: {
202: fprintf(stderr, "Normal RAM (0-0x%X):\n", STRamEnd);
203: show_cpu_area_stats(&cpu_profile.ram);
204:
205: fprintf(stderr, "ROM TOS (0x%X-0x%X):\n", TosAddress, TosAddress + TosSize);
206: show_cpu_area_stats(&cpu_profile.tos);
207:
208: fprintf(stderr, "Cartridge ROM (0xFA0000-0xFC0000):\n");
209: show_cpu_area_stats(&cpu_profile.rom);
210:
211: fprintf(stderr, "\n= %.5fs\n",
212: (double)cpu_profile.all.cycles / MachineClocks.CPU_Freq);
213:
214: #if ENABLE_WINUAE_CPU
215: if (cpu_profile.all.misses) { /* CPU cache in use? */
216: int i;
217: fprintf(stderr, "\nCache misses per instruction, number of occurrences:\n");
218: for (i = 0; i < MAX_MISS; i++) {
219: fprintf(stderr, "- %d: %d\n", i, cpu_profile.miss_counts[i]);
220: }
221: }
222: #endif
223: }
224:
225: /**
226: * Show CPU instructions which execution was profiled, in the address order,
227: * starting from the given address. Return next disassembly address.
228: */
229: Uint32 Profile_CpuShowAddresses(Uint32 lower, Uint32 upper, FILE *out)
230: {
231: int oldcols[DISASM_COLUMNS], newcols[DISASM_COLUMNS];
232: int show, shown, active;
233: const char *symbol;
234: cpu_profile_item_t *data;
235: Uint32 idx, end, size;
236: uaecptr nextpc, addr;
237:
238: data = cpu_profile.data;
239: if (!data) {
240: fprintf(stderr, "ERROR: no CPU profiling data available!\n");
241: return 0;
242: }
243:
244: size = cpu_profile.size;
245: active = cpu_profile.active;
246: if (upper) {
247: end = address2index(upper);
248: show = active;
249: if (end > size) {
250: end = size;
251: }
252: } else {
253: end = size;
254: show = ConfigureParams.Debugger.nDisasmLines;
255: if (!show || show > active) {
256: show = active;
257: }
258: }
259:
260: /* get/change columns */
261: Disasm_GetColumns(oldcols);
262: Disasm_DisableColumn(DISASM_COLUMN_HEXDUMP, oldcols, newcols);
263: Disasm_SetColumns(newcols);
264:
265: fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <sum of i-cache misses>)\n", out);
266:
267: nextpc = 0;
268: idx = address2index(lower);
269: for (shown = 0; shown < show && idx < end; idx++) {
270: if (!data[idx].count) {
271: continue;
272: }
273: addr = index2address(idx);
274: if (addr != nextpc && nextpc) {
275: fprintf(out, "[...]\n");
276: }
277: symbol = Symbols_GetByCpuAddress(addr);
278: if (symbol) {
279: fprintf(out, "%s:\n", symbol);
280: }
281: /* NOTE: column setup works only with 68kDisass disasm engine! */
282: Disasm(out, addr, &nextpc, 1);
283: shown++;
284: }
285: printf("Disassembled %d (of active %d) CPU addresses.\n", shown, active);
286:
287: /* restore disassembly columns */
288: Disasm_SetColumns(oldcols);
289: return nextpc;
290: }
291:
292: /**
293: * remove all disassembly columns except instruction ones.
294: * data needed to restore columns is stored to "oldcols"
295: */
296: static void leave_instruction_column(int *oldcols)
297: {
298: int i, newcols[DISASM_COLUMNS];
299:
300: Disasm_GetColumns(oldcols);
301: for (i = 0; i < DISASM_COLUMNS; i++) {
302: if (i == DISASM_COLUMN_OPCODE || i == DISASM_COLUMN_OPERAND) {
303: continue;
304: }
305: Disasm_DisableColumn(i, oldcols, newcols);
306: oldcols = newcols;
307: }
308: Disasm_SetColumns(newcols);
309: }
310:
311: #if ENABLE_WINUAE_CPU
312: /**
313: * compare function for qsort() to sort CPU profile data by instruction cache misses.
314: */
315: static int cmp_cpu_misses(const void *p1, const void *p2)
316: {
317: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].misses;
318: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].misses;
319: if (count1 > count2) {
320: return -1;
321: }
322: if (count1 < count2) {
323: return 1;
324: }
325: return 0;
326: }
327:
328: /**
329: * Sort CPU profile data addresses by instruction cache misses and show the results.
330: */
331: void Profile_CpuShowMisses(int show)
332: {
333: int active;
334: int oldcols[DISASM_COLUMNS];
335: Uint32 *sort_arr, *end, addr, nextpc;
336: cpu_profile_item_t *data = cpu_profile.data;
337: float percentage;
338: Uint32 count;
339:
340: if (!cpu_profile.all.misses) {
341: fprintf(stderr, "No CPU cache miss information available.\n");
342: return;
343: }
344:
345: active = cpu_profile.active;
346: sort_arr = cpu_profile.sort_arr;
347: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_misses);
348:
349: leave_instruction_column(oldcols);
350:
351: printf("addr:\t\tmisses:\n");
352: show = (show < active ? show : active);
353: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
354: addr = index2address(*sort_arr);
355: count = data[*sort_arr].misses;
356: percentage = 100.0*count/cpu_profile.all.misses;
357: printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
358: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
359: Disasm(stdout, addr, &nextpc, 1);
360: }
361: printf("%d CPU addresses listed.\n", show);
362:
363: Disasm_SetColumns(oldcols);
364: }
365: #else
366: void Profile_CpuShowMisses(int show) {
367: fprintf(stderr, "Cache misses are recorded only with WinUAE CPU.\n");
368: }
369: #endif
370:
371:
372: /**
373: * compare function for qsort() to sort CPU profile data by cycles counts.
374: */
375: static int cmp_cpu_cycles(const void *p1, const void *p2)
376: {
377: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].cycles;
378: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].cycles;
379: if (count1 > count2) {
380: return -1;
381: }
382: if (count1 < count2) {
383: return 1;
384: }
385: return 0;
386: }
387:
388: /**
389: * Sort CPU profile data addresses by cycle counts and show the results.
390: */
391: void Profile_CpuShowCycles(int show)
392: {
393: int active;
394: int oldcols[DISASM_COLUMNS];
395: Uint32 *sort_arr, *end, addr, nextpc;
396: cpu_profile_item_t *data = cpu_profile.data;
397: float percentage;
398: Uint32 count;
399:
400: if (!data) {
401: fprintf(stderr, "ERROR: no CPU profiling data available!\n");
402: return;
403: }
404:
405: active = cpu_profile.active;
406: sort_arr = cpu_profile.sort_arr;
407: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_cycles);
408:
409: leave_instruction_column(oldcols);
410:
411: printf("addr:\t\tcycles:\n");
412: show = (show < active ? show : active);
413: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
414: addr = index2address(*sort_arr);
415: count = data[*sort_arr].cycles;
416: percentage = 100.0*count/cpu_profile.all.cycles;
417: printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
418: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
419: Disasm(stdout, addr, &nextpc, 1);
420: }
421: printf("%d CPU addresses listed.\n", show);
422:
423: Disasm_SetColumns(oldcols);
424: }
425:
426: /**
427: * compare function for qsort() to sort CPU profile data by descending
428: * address access counts.
429: */
430: static int cmp_cpu_count(const void *p1, const void *p2)
431: {
432: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].count;
433: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].count;
434: if (count1 > count2) {
435: return -1;
436: }
437: if (count1 < count2) {
438: return 1;
439: }
440: return 0;
441: }
442:
443: /**
444: * Sort CPU profile data addresses by call counts and show the results.
445: * If symbols are requested and symbols are loaded, show (only) addresses
446: * matching a symbol.
447: */
448: void Profile_CpuShowCounts(int show, bool only_symbols)
449: {
450: cpu_profile_item_t *data = cpu_profile.data;
451: int symbols, matched, active;
452: int oldcols[DISASM_COLUMNS];
453: Uint32 *sort_arr, *end, addr, nextpc;
454: const char *name;
455: float percentage;
456: Uint32 count;
457:
458: if (!data) {
459: fprintf(stderr, "ERROR: no CPU profiling data available!\n");
460: return;
461: }
462: active = cpu_profile.active;
463: show = (show < active ? show : active);
464:
465: sort_arr = cpu_profile.sort_arr;
466: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_count);
467:
468: if (!only_symbols) {
469: leave_instruction_column(oldcols);
470: printf("addr:\t\tcount:\n");
471: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
472: addr = index2address(*sort_arr);
473: count = data[*sort_arr].count;
474: percentage = 100.0*count/cpu_profile.all.count;
475: printf("0x%06x\t%5.2f%%\t%d%s\t",
476: addr, percentage, count,
477: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
478: Disasm(stdout, addr, &nextpc, 1);
479: }
480: printf("%d CPU addresses listed.\n", show);
481: Disasm_SetColumns(oldcols);
482: return;
483: }
484:
485: symbols = Symbols_CpuCount();
486: if (!symbols) {
487: fprintf(stderr, "ERROR: no CPU symbols loaded!\n");
488: return;
489: }
490: matched = 0;
491:
492: leave_instruction_column(oldcols);
493:
494: printf("addr:\t\tcount:\t\tsymbol:\n");
495: for (end = sort_arr + active; sort_arr < end; sort_arr++) {
496:
497: addr = index2address(*sort_arr);
498: name = Symbols_GetByCpuAddress(addr);
499: if (!name) {
500: continue;
501: }
502: count = data[*sort_arr].count;
503: percentage = 100.0*count/cpu_profile.all.count;
504: printf("0x%06x\t%5.2f%%\t%d\t%s%s\t",
505: addr, percentage, count, name,
506: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
507: Disasm(stdout, addr, &nextpc, 1);
508:
509: matched++;
510: if (matched >= show || matched >= symbols) {
511: break;
512: }
513: }
514: printf("%d CPU symbols listed.\n", matched);
515:
516: Disasm_SetColumns(oldcols);
517: }
518:
519:
520: static const char * addr2name(Uint32 addr, Uint64 *total)
521: {
522: Uint32 idx = address2index(addr);
523: *total = cpu_profile.data[idx].count;
524: return Symbols_GetByCpuAddress(addr);
525: }
526:
527: /**
528: * Output CPU callers info to given file.
529: */
530: void Profile_CpuShowCallers(FILE *fp)
531: {
532: Profile_ShowCallers(fp, cpu_callinfo.sites, cpu_callinfo.site, addr2name);
533: }
534:
535: /**
536: * Save CPU profile information to given file.
537: */
538: void Profile_CpuSave(FILE *out)
539: {
540: Uint32 text;
541: fputs("Field names:\tExecuted instructions, Used cycles, Instruction cache misses\n", out);
542: /* (Python) pegexp that matches address and all describled fields from disassembly:
543: * $<hex> : <ASM> <percentage>% (<count>, <cycles>, <misses>)
544: * $e5af38 : rts 0.00% (12, 0, 12)
545: */
546: fputs("Field regexp:\t^\\$([0-9a-f]+) :.*% \\((.*)\\)$\n", out);
547: /* some information for interpreting the addresses */
548: fprintf(out, "ROM_TOS:\t0x%06x-0x%06x\n", TosAddress, TosAddress + TosSize);
549: text = DebugInfo_GetTEXT();
550: if (text < TosAddress) {
551: fprintf(out, "PROGRAM_TEXT:\t0x%06x-0x%06x\n", text, DebugInfo_GetTEXTEnd());
552: }
553: fprintf(out, "CARTRIDGE:\t0xfa0000-0xfc0000\n");
554: Profile_CpuShowAddresses(0, 0xFC0000-2, out);
555: Profile_CpuShowCallers(out);
556: }
557:
558: /* ------------------ CPU profile control ----------------- */
559:
560: /**
561: * Initialize CPU profiling when necessary. Return true if profiling.
562: */
563: bool Profile_CpuStart(void)
564: {
565: int size;
566:
567: Profile_FreeCallinfo(&(cpu_callinfo));
568: if (cpu_profile.sort_arr) {
569: /* remove previous results */
570: free(cpu_profile.sort_arr);
571: free(cpu_profile.data);
572: cpu_profile.sort_arr = NULL;
573: cpu_profile.data = NULL;
574: printf("Freed previous CPU profile buffers.\n");
575: }
576: if (!cpu_profile.enabled) {
577: return false;
578: }
579: /* zero everything */
580: memset(&cpu_profile, 0, sizeof(cpu_profile));
581:
582: /* Shouldn't change within same debug session */
583: size = (STRamEnd + 0x20000 + TosSize) / 2;
584:
585: /* Add one entry for catching invalid PC values */
586: cpu_profile.data = calloc(size + 1, sizeof(*cpu_profile.data));
587: if (!cpu_profile.data) {
588: perror("ERROR, new CPU profile buffer alloc failed");
589: return false;
590: }
591: printf("Allocated CPU profile buffer (%d MB).\n",
592: (int)sizeof(*cpu_profile.data)*size/(1024*1024));
593: cpu_profile.size = size;
594:
595: Profile_AllocCallinfo(&(cpu_callinfo), Symbols_CpuCount(), "CPU");
596:
597: /* special hack for EmuTOS */
598: etos_switcher = PC_UNDEFINED;
599: if (cpu_callinfo.sites && bIsEmuTOS &&
600: (!Symbols_GetCpuAddress(SYMTYPE_TEXT, "_switchto", &etos_switcher) || etos_switcher < TosAddress)) {
601: etos_switcher = PC_UNDEFINED;
602: }
603:
604: cpu_profile.prev_cycles = Cycles_GetCounter(CYCLES_COUNTER_CPU);
605: cpu_profile.prev_family = OpcodeFamily;
606: cpu_profile.prev_pc = M68000_GetPC() & 0xffffff;
607:
608: cpu_profile.disasm_addr = 0;
609: cpu_profile.processed = false;
610: cpu_profile.enabled = true;
611: return cpu_profile.enabled;
612: }
613:
614: /**
615: * return true if pc could be next instruction for previous pc
616: */
617: static bool is_prev_instr(Uint32 prev_pc, Uint32 pc)
618: {
619: /* just moved to next instruction (1-2 words)? */
620: if (prev_pc < pc && (pc - prev_pc) <= 10) {
621: return true;
622: }
623: return false;
624: }
625:
626: /**
627: * return caller instruction type classification
628: */
629: static calltype_t cpu_opcode_type(int family, Uint32 prev_pc, Uint32 pc)
630: {
631: switch (family) {
632:
633: case i_JSR:
634: case i_BSR:
635: return CALL_SUBROUTINE;
636:
637: case i_RTS:
638: case i_RTR:
639: case i_RTD:
640: return CALL_SUBRETURN;
641:
642: case i_JMP: /* often used also for "inlined" function calls... */
643: case i_Bcc: /* both BRA & BCC */
644: case i_FBcc:
645: case i_DBcc:
646: case i_FDBcc:
647: return CALL_BRANCH;
648:
649: case i_TRAP:
650: case i_TRAPV:
651: case i_TRAPcc:
652: case i_FTRAPcc:
653: case i_STOP:
654: case i_ILLG:
655: case i_CHK:
656: case i_CHK2:
657: case i_BKPT:
658: return CALL_EXCEPTION;
659:
660: case i_RTE:
661: return CALL_EXCRETURN;
662: }
663: /* just moved to next instruction? */
664: if (is_prev_instr(prev_pc, pc)) {
665: return CALL_NEXT;
666: }
667: return CALL_UNKNOWN;
668: }
669:
670: /**
671: * If call tracking is enabled (there are symbols), collect
672: * information about subroutine and other calls, and their costs.
673: *
674: * Like with profile data, caller info checks need to be for previous
675: * instruction, that's why "pc" argument for this function actually
676: * needs to be previous PC.
677: */
678: static void collect_calls(Uint32 pc, counters_t *counters)
679: {
680: calltype_t flag;
681: int idx, family;
682: Uint32 prev_pc, caller_pc;
683:
684: family = cpu_profile.prev_family;
685: cpu_profile.prev_family = OpcodeFamily;
686:
687: prev_pc = cpu_callinfo.prev_pc;
688: cpu_callinfo.prev_pc = pc;
689: caller_pc = PC_UNDEFINED;
690:
691: /* address is return address for last subroutine call? */
692: if (unlikely(pc == cpu_callinfo.return_pc) && likely(cpu_callinfo.depth)) {
693:
694: flag = cpu_opcode_type(family, prev_pc, pc);
695: /* previous address can be exception return (RTE) if exception
696: * occurred right after returning from subroutine call (RTS)
697: */
698: if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) {
699: caller_pc = Profile_CallEnd(&cpu_callinfo, counters);
700: } else {
701: #if DEBUG
702: /* although at return address, it didn't return yet,
703: * e.g. because there was a jsr or jump to return address
704: */
705: Uint32 nextpc;
706: fprintf(stderr, "WARNING: subroutine call returned 0x%x -> 0x%x, not though RTS!\n", prev_pc, pc);
707: Disasm(stderr, prev_pc, &nextpc, 1);
708: #endif
709: }
710: /* next address might be another function, so need to fall through */
711: }
712:
713: /* address is one which we're tracking? */
714: idx = Symbols_GetCpuAddressIndex(pc);
715: if (unlikely(idx >= 0)) {
716:
717: flag = cpu_opcode_type(family, prev_pc, pc);
718: if (flag == CALL_SUBROUTINE) {
719: /* special HACK for for EmuTOS AES switcher which
720: * changes stack content to remove itself from call
721: * stack and uses RTS for subroutine *calls*, not
722: * for returning from them.
723: *
724: * It wouldn't be reliable to detect calls from it,
725: * so I'm making call *to* it show up as branch, to
726: * keep callstack depth correct.
727: */
728: if (unlikely(pc == etos_switcher)) {
729: flag = CALL_BRANCH;
730: } else if (unlikely(prev_pc == PC_UNDEFINED)) {
731: /* if first profiled instruction
732: * is subroutine call, it doesn't have
733: * valid prev_pc value stored
734: */
735: cpu_callinfo.return_pc = PC_UNDEFINED;
736: fprintf(stderr, "WARNING: previous PC from callinfo for 0x%d is undefined!\n", pc);
737: #if DEBUG
738: skip_assert = true;
739: DebugUI(REASON_CPU_EXCEPTION);
740: #endif
741: } else {
742: /* slow! */
743: cpu_callinfo.return_pc = Disasm_GetNextPC(prev_pc);
744: }
745: } else if (caller_pc != PC_UNDEFINED) {
746: /* returned from function to first instrction of another symbol:
747: * 0xf384 jsr some_function
748: * other_symbol:
749: * 0f3x8a some_instruction
750: * -> change return instruction address to
751: * address of what did the returned call.
752: */
753: prev_pc = caller_pc;
754: assert(is_prev_instr(prev_pc, pc));
755: flag = CALL_NEXT;
756: }
757: Profile_CallStart(idx, &cpu_callinfo, prev_pc, flag, pc, counters);
758: }
759: }
760:
761: /**
762: * Update CPU cycle and count statistics for PC address.
763: *
764: * This gets called after instruction has executed and PC
765: * has advanced to next instruction.
766: */
767: void Profile_CpuUpdate(void)
768: {
769: counters_t *counters = &(cpu_profile.all);
770: Uint32 pc, prev_pc, idx, cycles, misses;
771: cpu_profile_item_t *prev;
772:
773: prev_pc = cpu_profile.prev_pc;
774: /* PC may have extra bits, they need to be masked away as
775: * emulation itself does that too when PC value is used
776: */
777: cpu_profile.prev_pc = pc = M68000_GetPC() & 0xffffff;
778:
779: idx = address2index(prev_pc);
780: assert(idx <= cpu_profile.size);
781: prev = cpu_profile.data + idx;
782:
783: if (likely(prev->count < MAX_CPU_PROFILE_VALUE)) {
784: prev->count++;
785: }
786:
787: #if USE_CYCLES_COUNTER
788: /* Confusingly, with DSP enabled, cycle counter is for this instruction,
789: * without DSP enabled, it's a monotonically increasing counter.
790: */
791: if (bDspEnabled) {
792: cycles = Cycles_GetCounter(CYCLES_COUNTER_CPU);
793: } else {
794: Uint32 newcycles = Cycles_GetCounter(CYCLES_COUNTER_CPU);
795: cycles = newcycles - cpu_profile.prev_cycles;
796: cpu_profile.prev_cycles = newcycles;
797: }
798: #else
799: cycles = CurrentInstrCycles + nWaitStateCycles;
800: #endif
801: /* cycles are based on 8Mhz clock, change them to correct one */
802: cycles <<= nCpuFreqShift;
803:
804: if (likely(prev->cycles < MAX_CPU_PROFILE_VALUE - cycles)) {
805: prev->cycles += cycles;
806: } else {
807: prev->cycles = MAX_CPU_PROFILE_VALUE;
808: }
809:
810: #if ENABLE_WINUAE_CPU
811: misses = CpuInstruction.iCacheMisses;
812: assert(misses < MAX_MISS);
813: cpu_profile.miss_counts[misses]++;
814: if (likely(prev->misses < MAX_CPU_PROFILE_VALUE - misses)) {
815: prev->misses += misses;
816: } else {
817: prev->misses = MAX_CPU_PROFILE_VALUE;
818: }
819: #else
820: misses = 0;
821: #endif
822: if (cpu_callinfo.sites) {
823: collect_calls(prev_pc, counters);
824: }
825: /* counters are increased after caller info is processed,
826: * otherwise cost for the instruction calling the callee
827: * doesn't get accounted to caller (but callee).
828: */
829: counters->misses += misses;
830: counters->cycles += cycles;
831: counters->count++;
832:
833: #if DEBUG
834: if (unlikely(OpcodeFamily == 0)) {
835: Uint32 nextpc;
836: fputs("WARNING: instruction opcode family is zero (=i_ILLG) for instruction:\n", stderr);
837: Disasm(stderr, prev_pc, &nextpc, 1);
838: }
839: /* catch too large (and negative) cycles for other than STOP instruction */
840: if (unlikely(cycles > 512 && OpcodeFamily != i_STOP)) {
841: Uint32 nextpc;
842: fprintf(stderr, "WARNING: cycles %d > 512:\n", cycles);
843: Disasm(stderr, prev_pc, &nextpc, 1);
844: }
845: if (unlikely(cycles == 0)) {
846: Uint32 nextpc;
847: fputs("WARNING: Zero cycles for an opcode:\n", stderr);
848: Disasm(stderr, prev_pc, &nextpc, 1);
849: }
850: #endif
851: }
852:
853:
854: /**
855: * Helper for accounting CPU profile area item.
856: */
857: static void update_area_item(profile_area_t *area, Uint32 addr, cpu_profile_item_t *item)
858: {
859: Uint32 cycles = item->cycles;
860: Uint32 count = item->count;
861:
862: if (!count) {
863: return;
864: }
865: area->counters.count += count;
866: area->counters.misses += item->misses;
867: area->counters.cycles += cycles;
868:
869: if (cycles == MAX_CPU_PROFILE_VALUE) {
870: area->overflow = true;
871: }
872: if (addr < area->lowest) {
873: area->lowest = addr;
874: }
875: area->highest = addr;
876:
877: area->active++;
878: }
879:
880: /**
881: * Helper for collecting CPU profile area statistics.
882: */
883: static Uint32 update_area(profile_area_t *area, Uint32 start, Uint32 end)
884: {
885: cpu_profile_item_t *item;
886: Uint32 addr;
887:
888: memset(area, 0, sizeof(profile_area_t));
889: area->lowest = cpu_profile.size;
890:
891: item = &(cpu_profile.data[start]);
892: for (addr = start; addr < end; addr++, item++) {
893: update_area_item(area, addr, item);
894: }
895: return addr;
896: }
897:
898: /**
899: * Helper for initializing CPU profile area sorting indexes.
900: */
901: static Uint32* index_area(profile_area_t *area, Uint32 *sort_arr)
902: {
903: cpu_profile_item_t *item;
904: Uint32 addr;
905:
906: item = &(cpu_profile.data[area->lowest]);
907: for (addr = area->lowest; addr <= area->highest; addr++, item++) {
908: if (item->count) {
909: *sort_arr++ = addr;
910: }
911: }
912: return sort_arr;
913: }
914:
915: /**
916: * Stop and process the CPU profiling data; collect stats and
917: * prepare for more optimal sorting.
918: */
919: void Profile_CpuStop(void)
920: {
921: Uint32 *sort_arr, next;
922: int active;
923:
924: if (cpu_profile.processed || !cpu_profile.enabled) {
925: return;
926: }
927: /* user didn't change RAM or TOS size in the meanwhile? */
928: assert(cpu_profile.size == (STRamEnd + 0x20000 + TosSize) / 2);
929:
930: Profile_FinalizeCalls(&(cpu_callinfo), &(cpu_profile.all), Symbols_GetByCpuAddress);
931:
932: /* find lowest and highest addresses executed etc */
933: next = update_area(&cpu_profile.ram, 0, STRamEnd/2);
934: next = update_area(&cpu_profile.tos, next, (STRamEnd + TosSize)/2);
935: next = update_area(&cpu_profile.rom, next, cpu_profile.size);
936: assert(next == cpu_profile.size);
937:
938: #if DEBUG
939: if (skip_assert) {
940: skip_assert = false;
941: } else
942: #endif
943: {
944: assert(cpu_profile.all.misses == cpu_profile.ram.counters.misses + cpu_profile.tos.counters.misses + cpu_profile.rom.counters.misses);
945: assert(cpu_profile.all.cycles == cpu_profile.ram.counters.cycles + cpu_profile.tos.counters.cycles + cpu_profile.rom.counters.cycles);
946: assert(cpu_profile.all.count == cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count);
947: }
948:
949: /* allocate address array for sorting */
950: active = cpu_profile.ram.active + cpu_profile.rom.active + cpu_profile.tos.active;
951: sort_arr = calloc(active, sizeof(*sort_arr));
952:
953: if (!sort_arr) {
954: perror("ERROR: allocating CPU profile address data");
955: free(cpu_profile.data);
956: cpu_profile.data = NULL;
957: return;
958: }
959: printf("Allocated CPU profile address buffer (%d KB).\n",
960: (int)sizeof(*sort_arr)*(active+512)/1024);
961: cpu_profile.sort_arr = sort_arr;
962: cpu_profile.active = active;
963:
964: /* and fill addresses for used instructions... */
965: sort_arr = index_area(&cpu_profile.ram, sort_arr);
966: sort_arr = index_area(&cpu_profile.tos, sort_arr);
967: sort_arr = index_area(&cpu_profile.rom, sort_arr);
968: assert(sort_arr == cpu_profile.sort_arr + cpu_profile.active);
969: //printf("%d/%d/%d\n", area->active, sort_arr-cpu_profile.sort_arr, active);
970:
971: Profile_CpuShowStats();
972: cpu_profile.processed = true;
973: }
974:
975: /**
976: * Get pointers to CPU profile enabling and disasm address variables
977: * for updating them (in parser).
978: */
979: void Profile_CpuGetPointers(bool **enabled, Uint32 **disasm_addr)
980: {
981: *disasm_addr = &cpu_profile.disasm_addr;
982: *enabled = &cpu_profile.enabled;
983: }
984:
985: /**
986: * Get callinfo & symbol search pointers for stack walking.
987: */
988: void Profile_CpuGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32))
989: {
990: *callinfo = &(cpu_callinfo);
991: *get_symbol = Symbols_GetByCpuAddress;
992: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.