|
|
1.1 root 1: /*
2: * Hatari - profilecpu.c
3: *
4: * Copyright (C) 2010-2013 by Eero Tamminen
5: *
6: * This file is distributed under the GNU General Public License, version 2
7: * or at your option any later version. Read the file gpl.txt for details.
8: *
9: * profilecpu.c - functions for profiling CPU and showing the results.
10: */
11: const char Profilecpu_fileid[] = "Hatari profilecpu.c : " __DATE__ " " __TIME__;
12:
13: #include <stdio.h>
14: #include <inttypes.h>
15: #include <assert.h>
16: #include "main.h"
17: #include "configuration.h"
18: #include "clocks_timings.h"
19: #include "debugInfo.h"
20: #include "dsp.h"
21: #include "m68000.h"
22: #include "68kDisass.h"
23: #include "profile.h"
24: #include "profile_priv.h"
25: #include "stMemory.h"
26: #include "symbols.h"
27: #include "tos.h"
1.1.1.2 ! root 28: #include "screen.h"
! 29: #include "video.h"
! 30:
! 31:
! 32: /* cartridge area */
! 33: #define CART_START 0xFA0000
! 34: #define CART_END 0xFC0000
! 35: #define CART_SIZE (CART_END - CART_START)
! 36:
1.1 root 37:
38: /* if non-zero, output (more) warnings on suspicious:
39: * - cycle/instruction counts
40: * - PC switches
41: * And drop to debugger on invalid PC addresses.
42: */
43: #define DEBUG 0
44: #if DEBUG
45: #include "debugui.h"
46: static bool skip_assert;
47: #endif
48:
49: static callinfo_t cpu_callinfo;
50:
51: /* This is relevant with WinUAE CPU core:
52: * - the default cycle exact variant needs this define to be non-zero
53: * - non-cycle exact and MMU variants need this define to be 0
54: * for cycle counts to make any sense
55: */
56: #define USE_CYCLES_COUNTER 1
57:
58: #define MAX_CPU_PROFILE_VALUE 0xFFFFFFFF
59:
60: typedef struct {
61: Uint32 count; /* how many times this address instrcution is executed */
62: Uint32 cycles; /* how many CPU cycles was taken at this address */
63: Uint32 misses; /* how many CPU cache misses happened at this address */
64: } cpu_profile_item_t;
65:
66: #define MAX_MISS 4
67:
68: static struct {
69: counters_t all; /* total counts for all areas */
70: Uint32 miss_counts[MAX_MISS]; /* cache miss counts */
71: cpu_profile_item_t *data; /* profile data items */
72: Uint32 size; /* number of allocated profile data items */
73: profile_area_t ram; /* normal RAM stats */
74: profile_area_t rom; /* cartridge ROM stats */
75: profile_area_t tos; /* ROM TOS stats */
76: int active; /* number of active data items in all areas */
77: Uint32 *sort_arr; /* data indexes used for sorting */
1.1.1.2 ! root 78: int prev_family; /* previous instruction opcode family */
1.1 root 79: Uint32 prev_cycles; /* previous instruction cycles counter */
80: Uint32 prev_pc; /* previous instruction address */
1.1.1.2 ! root 81: Uint32 loop_start; /* address of last loop start */
! 82: Uint32 loop_end; /* address of last loop end */
! 83: Uint32 loop_count; /* how many times it was looped */
1.1 root 84: Uint32 disasm_addr; /* 'addresses' command start address */
85: bool processed; /* true when data is already processed */
86: bool enabled; /* true when profiling enabled */
87: } cpu_profile;
88:
89: /* special hack for EmuTOS */
90: static Uint32 etos_switcher;
91:
92:
93: /* ------------------ CPU profile address mapping ----------------- */
94:
95: /**
96: * convert Atari memory address to sorting array profile data index.
97: */
98: static inline Uint32 address2index(Uint32 pc)
99: {
100: if (unlikely(pc & 1)) {
101: fprintf(stderr, "WARNING: odd CPU profile instruction address 0x%x!\n", pc);
102: #if DEBUG
103: skip_assert = true;
104: DebugUI(REASON_CPU_EXCEPTION);
105: #endif
106: }
1.1.1.2 ! root 107: if (pc < STRamEnd) {
! 108: /* most likely case, use RAM address as-is */
! 109:
! 110: } else if (pc >= TosAddress && pc < TosAddress + TosSize) {
1.1 root 111: /* TOS, put it after RAM data */
112: pc = pc - TosAddress + STRamEnd;
1.1.1.2 ! root 113: if (TosAddress >= CART_END) {
! 114: /* and after cartridge data as it's higher */
! 115: pc += CART_SIZE;
! 116: }
! 117: } else if (pc >= CART_START && pc < CART_END) {
! 118: /* ROM, put it after RAM data */
! 119: pc = pc - CART_START + STRamEnd;
! 120: if (TosAddress < CART_START) {
! 121: /* and after TOS as it's higher */
! 122: pc += TosSize;
! 123: }
1.1 root 124: } else {
1.1.1.2 ! root 125: fprintf(stderr, "WARNING: 'invalid' CPU PC profile instruction address 0x%x!\n", pc);
! 126: /* extra entry at end is reserved for invalid PC values */
! 127: pc = STRamEnd + TosSize + 0x20000;
1.1 root 128: #if DEBUG
1.1.1.2 ! root 129: skip_assert = true;
! 130: DebugUI(REASON_CPU_EXCEPTION);
1.1 root 131: #endif
132: }
133: /* CPU instructions are at even addresses, save space by halving */
134: return (pc >> 1);
135: }
136:
137: /**
138: * convert sorting array profile data index to Atari memory address.
139: */
140: static Uint32 index2address(Uint32 idx)
141: {
142: idx <<= 1;
143: /* RAM */
144: if (idx < STRamEnd) {
145: return idx;
146: }
147: idx -= STRamEnd;
1.1.1.2 ! root 148: /* TOS before cartridge area? */
! 149: if (TosAddress < CART_START) {
! 150: /* TOS */
! 151: if (idx < TosSize) {
! 152: return idx + TosAddress;
! 153: }
! 154: idx -= TosSize;
! 155: /* ROM */
! 156: return idx + CART_START;
! 157: } else {
! 158: /* ROM */
! 159: if (idx < CART_SIZE) {
! 160: return idx + CART_START;
! 161: }
! 162: idx -= CART_SIZE;
! 163: /* TOS */
1.1 root 164: return idx + TosAddress;
165: }
166: }
167:
168: /* ------------------ CPU profile results ----------------- */
169:
170: /**
171: * Get CPU cycles, count and count percentage for given address.
172: * Return true if data was available and non-zero, false otherwise.
173: */
174: bool Profile_CpuAddressData(Uint32 addr, float *percentage, Uint32 *count, Uint32 *cycles, Uint32 *misses)
175: {
176: Uint32 idx;
177: if (!cpu_profile.data) {
178: return false;
179: }
180: idx = address2index(addr);
181: *misses = cpu_profile.data[idx].misses;
182: *cycles = cpu_profile.data[idx].cycles;
183: *count = cpu_profile.data[idx].count;
184: if (cpu_profile.all.count) {
185: *percentage = 100.0*(*count)/cpu_profile.all.count;
186: } else {
187: *percentage = 0.0;
188: }
189: return (*count > 0);
190: }
191:
192: /**
193: * Helper to show statistics for specified CPU profile area.
194: */
195: static void show_cpu_area_stats(profile_area_t *area)
196: {
197: if (!area->active) {
198: fprintf(stderr, "- no activity\n");
199: return;
200: }
201: fprintf(stderr, "- active address range:\n 0x%06x-0x%06x\n",
202: index2address(area->lowest),
203: index2address(area->highest));
204: fprintf(stderr, "- active instruction addresses:\n %d (%.2f%% of all)\n",
205: area->active,
206: 100.0 * area->active / cpu_profile.active);
207: fprintf(stderr, "- executed instructions:\n %"PRIu64" (%.2f%% of all)\n",
208: area->counters.count,
209: 100.0 * area->counters.count / cpu_profile.all.count);
210: #if ENABLE_WINUAE_CPU
211: if (cpu_profile.all.misses) { /* CPU cache in use? */
212: fprintf(stderr, "- instruction cache misses:\n %"PRIu64" (%.2f%% of all)\n",
213: area->counters.misses,
214: 100.0 * area->counters.misses / cpu_profile.all.misses);
215: }
216: #endif
217: fprintf(stderr, "- used cycles:\n %"PRIu64" (%.2f%% of all)\n = %.5fs\n",
218: area->counters.cycles,
219: 100.0 * area->counters.cycles / cpu_profile.all.cycles,
220: (double)area->counters.cycles / MachineClocks.CPU_Freq);
221: if (area->overflow) {
222: fprintf(stderr, " *** COUNTER OVERFLOW! ***\n");
223: }
224: }
225:
226:
227: /**
228: * show CPU area (RAM, ROM, TOS) specific statistics.
229: */
230: void Profile_CpuShowStats(void)
231: {
232: fprintf(stderr, "Normal RAM (0-0x%X):\n", STRamEnd);
233: show_cpu_area_stats(&cpu_profile.ram);
234:
235: fprintf(stderr, "ROM TOS (0x%X-0x%X):\n", TosAddress, TosAddress + TosSize);
236: show_cpu_area_stats(&cpu_profile.tos);
237:
1.1.1.2 ! root 238: fprintf(stderr, "Cartridge ROM (0x%X-%X):\n", CART_START, CART_END);
1.1 root 239: show_cpu_area_stats(&cpu_profile.rom);
240:
241: fprintf(stderr, "\n= %.5fs\n",
242: (double)cpu_profile.all.cycles / MachineClocks.CPU_Freq);
243:
244: #if ENABLE_WINUAE_CPU
245: if (cpu_profile.all.misses) { /* CPU cache in use? */
246: int i;
247: fprintf(stderr, "\nCache misses per instruction, number of occurrences:\n");
248: for (i = 0; i < MAX_MISS; i++) {
249: fprintf(stderr, "- %d: %d\n", i, cpu_profile.miss_counts[i]);
250: }
251: }
252: #endif
253: }
254:
255: /**
256: * Show CPU instructions which execution was profiled, in the address order,
257: * starting from the given address. Return next disassembly address.
258: */
259: Uint32 Profile_CpuShowAddresses(Uint32 lower, Uint32 upper, FILE *out)
260: {
261: int oldcols[DISASM_COLUMNS], newcols[DISASM_COLUMNS];
262: int show, shown, active;
263: const char *symbol;
264: cpu_profile_item_t *data;
265: Uint32 idx, end, size;
266: uaecptr nextpc, addr;
267:
268: data = cpu_profile.data;
269: if (!data) {
270: fprintf(stderr, "ERROR: no CPU profiling data available!\n");
271: return 0;
272: }
273:
274: size = cpu_profile.size;
275: active = cpu_profile.active;
276: if (upper) {
277: end = address2index(upper);
278: show = active;
279: if (end > size) {
280: end = size;
281: }
282: } else {
283: end = size;
284: show = ConfigureParams.Debugger.nDisasmLines;
285: if (!show || show > active) {
286: show = active;
287: }
288: }
289:
290: /* get/change columns */
291: Disasm_GetColumns(oldcols);
292: Disasm_DisableColumn(DISASM_COLUMN_HEXDUMP, oldcols, newcols);
293: Disasm_SetColumns(newcols);
294:
295: fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <sum of i-cache misses>)\n", out);
296:
297: nextpc = 0;
298: idx = address2index(lower);
299: for (shown = 0; shown < show && idx < end; idx++) {
300: if (!data[idx].count) {
301: continue;
302: }
303: addr = index2address(idx);
304: if (addr != nextpc && nextpc) {
305: fprintf(out, "[...]\n");
306: }
307: symbol = Symbols_GetByCpuAddress(addr);
308: if (symbol) {
309: fprintf(out, "%s:\n", symbol);
310: }
311: /* NOTE: column setup works only with 68kDisass disasm engine! */
312: Disasm(out, addr, &nextpc, 1);
313: shown++;
314: }
315: printf("Disassembled %d (of active %d) CPU addresses.\n", shown, active);
316:
317: /* restore disassembly columns */
318: Disasm_SetColumns(oldcols);
319: return nextpc;
320: }
321:
322: /**
323: * remove all disassembly columns except instruction ones.
324: * data needed to restore columns is stored to "oldcols"
325: */
326: static void leave_instruction_column(int *oldcols)
327: {
328: int i, newcols[DISASM_COLUMNS];
329:
330: Disasm_GetColumns(oldcols);
331: for (i = 0; i < DISASM_COLUMNS; i++) {
332: if (i == DISASM_COLUMN_OPCODE || i == DISASM_COLUMN_OPERAND) {
333: continue;
334: }
335: Disasm_DisableColumn(i, oldcols, newcols);
336: oldcols = newcols;
337: }
338: Disasm_SetColumns(newcols);
339: }
340:
341: #if ENABLE_WINUAE_CPU
342: /**
343: * compare function for qsort() to sort CPU profile data by instruction cache misses.
344: */
345: static int cmp_cpu_misses(const void *p1, const void *p2)
346: {
347: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].misses;
348: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].misses;
349: if (count1 > count2) {
350: return -1;
351: }
352: if (count1 < count2) {
353: return 1;
354: }
355: return 0;
356: }
357:
358: /**
359: * Sort CPU profile data addresses by instruction cache misses and show the results.
360: */
361: void Profile_CpuShowMisses(int show)
362: {
363: int active;
364: int oldcols[DISASM_COLUMNS];
365: Uint32 *sort_arr, *end, addr, nextpc;
366: cpu_profile_item_t *data = cpu_profile.data;
367: float percentage;
368: Uint32 count;
369:
370: if (!cpu_profile.all.misses) {
371: fprintf(stderr, "No CPU cache miss information available.\n");
372: return;
373: }
374:
375: active = cpu_profile.active;
376: sort_arr = cpu_profile.sort_arr;
377: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_misses);
378:
379: leave_instruction_column(oldcols);
380:
381: printf("addr:\t\tmisses:\n");
382: show = (show < active ? show : active);
383: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
384: addr = index2address(*sort_arr);
385: count = data[*sort_arr].misses;
386: percentage = 100.0*count/cpu_profile.all.misses;
387: printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
388: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
389: Disasm(stdout, addr, &nextpc, 1);
390: }
391: printf("%d CPU addresses listed.\n", show);
392:
393: Disasm_SetColumns(oldcols);
394: }
395: #else
396: void Profile_CpuShowMisses(int show) {
397: fprintf(stderr, "Cache misses are recorded only with WinUAE CPU.\n");
398: }
399: #endif
400:
401:
402: /**
403: * compare function for qsort() to sort CPU profile data by cycles counts.
404: */
405: static int cmp_cpu_cycles(const void *p1, const void *p2)
406: {
407: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].cycles;
408: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].cycles;
409: if (count1 > count2) {
410: return -1;
411: }
412: if (count1 < count2) {
413: return 1;
414: }
415: return 0;
416: }
417:
418: /**
419: * Sort CPU profile data addresses by cycle counts and show the results.
420: */
421: void Profile_CpuShowCycles(int show)
422: {
423: int active;
424: int oldcols[DISASM_COLUMNS];
425: Uint32 *sort_arr, *end, addr, nextpc;
426: cpu_profile_item_t *data = cpu_profile.data;
427: float percentage;
428: Uint32 count;
429:
430: if (!data) {
431: fprintf(stderr, "ERROR: no CPU profiling data available!\n");
432: return;
433: }
434:
435: active = cpu_profile.active;
436: sort_arr = cpu_profile.sort_arr;
437: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_cycles);
438:
439: leave_instruction_column(oldcols);
440:
441: printf("addr:\t\tcycles:\n");
442: show = (show < active ? show : active);
443: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
444: addr = index2address(*sort_arr);
445: count = data[*sort_arr].cycles;
446: percentage = 100.0*count/cpu_profile.all.cycles;
447: printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
448: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
449: Disasm(stdout, addr, &nextpc, 1);
450: }
451: printf("%d CPU addresses listed.\n", show);
452:
453: Disasm_SetColumns(oldcols);
454: }
455:
456: /**
457: * compare function for qsort() to sort CPU profile data by descending
458: * address access counts.
459: */
460: static int cmp_cpu_count(const void *p1, const void *p2)
461: {
462: Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].count;
463: Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].count;
464: if (count1 > count2) {
465: return -1;
466: }
467: if (count1 < count2) {
468: return 1;
469: }
470: return 0;
471: }
472:
473: /**
474: * Sort CPU profile data addresses by call counts and show the results.
475: * If symbols are requested and symbols are loaded, show (only) addresses
476: * matching a symbol.
477: */
478: void Profile_CpuShowCounts(int show, bool only_symbols)
479: {
480: cpu_profile_item_t *data = cpu_profile.data;
481: int symbols, matched, active;
482: int oldcols[DISASM_COLUMNS];
483: Uint32 *sort_arr, *end, addr, nextpc;
484: const char *name;
485: float percentage;
486: Uint32 count;
487:
488: if (!data) {
489: fprintf(stderr, "ERROR: no CPU profiling data available!\n");
490: return;
491: }
492: active = cpu_profile.active;
493: show = (show < active ? show : active);
494:
495: sort_arr = cpu_profile.sort_arr;
496: qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_count);
497:
498: if (!only_symbols) {
499: leave_instruction_column(oldcols);
500: printf("addr:\t\tcount:\n");
501: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
502: addr = index2address(*sort_arr);
503: count = data[*sort_arr].count;
504: percentage = 100.0*count/cpu_profile.all.count;
505: printf("0x%06x\t%5.2f%%\t%d%s\t",
506: addr, percentage, count,
507: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
508: Disasm(stdout, addr, &nextpc, 1);
509: }
510: printf("%d CPU addresses listed.\n", show);
511: Disasm_SetColumns(oldcols);
512: return;
513: }
514:
515: symbols = Symbols_CpuCount();
516: if (!symbols) {
517: fprintf(stderr, "ERROR: no CPU symbols loaded!\n");
518: return;
519: }
520: matched = 0;
521:
522: leave_instruction_column(oldcols);
523:
524: printf("addr:\t\tcount:\t\tsymbol:\n");
525: for (end = sort_arr + active; sort_arr < end; sort_arr++) {
526:
527: addr = index2address(*sort_arr);
528: name = Symbols_GetByCpuAddress(addr);
529: if (!name) {
530: continue;
531: }
532: count = data[*sort_arr].count;
533: percentage = 100.0*count/cpu_profile.all.count;
534: printf("0x%06x\t%5.2f%%\t%d\t%s%s\t",
535: addr, percentage, count, name,
536: count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
537: Disasm(stdout, addr, &nextpc, 1);
538:
539: matched++;
540: if (matched >= show || matched >= symbols) {
541: break;
542: }
543: }
544: printf("%d CPU symbols listed.\n", matched);
545:
546: Disasm_SetColumns(oldcols);
547: }
548:
549:
550: static const char * addr2name(Uint32 addr, Uint64 *total)
551: {
552: Uint32 idx = address2index(addr);
553: *total = cpu_profile.data[idx].count;
554: return Symbols_GetByCpuAddress(addr);
555: }
556:
557: /**
558: * Output CPU callers info to given file.
559: */
560: void Profile_CpuShowCallers(FILE *fp)
561: {
562: Profile_ShowCallers(fp, cpu_callinfo.sites, cpu_callinfo.site, addr2name);
563: }
564:
565: /**
566: * Save CPU profile information to given file.
567: */
568: void Profile_CpuSave(FILE *out)
569: {
570: Uint32 text;
571: fputs("Field names:\tExecuted instructions, Used cycles, Instruction cache misses\n", out);
572: /* (Python) pegexp that matches address and all describled fields from disassembly:
573: * $<hex> : <ASM> <percentage>% (<count>, <cycles>, <misses>)
574: * $e5af38 : rts 0.00% (12, 0, 12)
575: */
576: fputs("Field regexp:\t^\\$([0-9a-f]+) :.*% \\((.*)\\)$\n", out);
577: /* some information for interpreting the addresses */
578: fprintf(out, "ROM_TOS:\t0x%06x-0x%06x\n", TosAddress, TosAddress + TosSize);
579: text = DebugInfo_GetTEXT();
580: if (text < TosAddress) {
581: fprintf(out, "PROGRAM_TEXT:\t0x%06x-0x%06x\n", text, DebugInfo_GetTEXTEnd());
582: }
1.1.1.2 ! root 583: fprintf(out, "CARTRIDGE:\t0x%06x-0x%06x\n", CART_START, CART_END);
! 584: Profile_CpuShowAddresses(0, CART_END-2, out);
1.1 root 585: Profile_CpuShowCallers(out);
586: }
587:
588: /* ------------------ CPU profile control ----------------- */
589:
590: /**
591: * Initialize CPU profiling when necessary. Return true if profiling.
592: */
593: bool Profile_CpuStart(void)
594: {
595: int size;
596:
597: Profile_FreeCallinfo(&(cpu_callinfo));
598: if (cpu_profile.sort_arr) {
599: /* remove previous results */
600: free(cpu_profile.sort_arr);
601: free(cpu_profile.data);
602: cpu_profile.sort_arr = NULL;
603: cpu_profile.data = NULL;
604: printf("Freed previous CPU profile buffers.\n");
605: }
606: if (!cpu_profile.enabled) {
607: return false;
608: }
609: /* zero everything */
610: memset(&cpu_profile, 0, sizeof(cpu_profile));
611:
612: /* Shouldn't change within same debug session */
613: size = (STRamEnd + 0x20000 + TosSize) / 2;
614:
615: /* Add one entry for catching invalid PC values */
616: cpu_profile.data = calloc(size + 1, sizeof(*cpu_profile.data));
617: if (!cpu_profile.data) {
618: perror("ERROR, new CPU profile buffer alloc failed");
619: return false;
620: }
621: printf("Allocated CPU profile buffer (%d MB).\n",
622: (int)sizeof(*cpu_profile.data)*size/(1024*1024));
623: cpu_profile.size = size;
624:
625: Profile_AllocCallinfo(&(cpu_callinfo), Symbols_CpuCount(), "CPU");
626:
627: /* special hack for EmuTOS */
628: etos_switcher = PC_UNDEFINED;
629: if (cpu_callinfo.sites && bIsEmuTOS &&
630: (!Symbols_GetCpuAddress(SYMTYPE_TEXT, "_switchto", &etos_switcher) || etos_switcher < TosAddress)) {
631: etos_switcher = PC_UNDEFINED;
632: }
633:
634: cpu_profile.prev_cycles = Cycles_GetCounter(CYCLES_COUNTER_CPU);
635: cpu_profile.prev_family = OpcodeFamily;
636: cpu_profile.prev_pc = M68000_GetPC() & 0xffffff;
637:
1.1.1.2 ! root 638: cpu_profile.loop_start = PC_UNDEFINED;
! 639: cpu_profile.loop_end = PC_UNDEFINED;
! 640: cpu_profile.loop_count = 0;
! 641: Profile_LoopReset();
! 642:
1.1 root 643: cpu_profile.disasm_addr = 0;
644: cpu_profile.processed = false;
645: cpu_profile.enabled = true;
646: return cpu_profile.enabled;
647: }
648:
649: /**
650: * return true if pc could be next instruction for previous pc
651: */
652: static bool is_prev_instr(Uint32 prev_pc, Uint32 pc)
653: {
654: /* just moved to next instruction (1-2 words)? */
655: if (prev_pc < pc && (pc - prev_pc) <= 10) {
656: return true;
657: }
658: return false;
659: }
660:
661: /**
662: * return caller instruction type classification
663: */
664: static calltype_t cpu_opcode_type(int family, Uint32 prev_pc, Uint32 pc)
665: {
666: switch (family) {
667:
668: case i_JSR:
669: case i_BSR:
670: return CALL_SUBROUTINE;
671:
672: case i_RTS:
673: case i_RTR:
674: case i_RTD:
675: return CALL_SUBRETURN;
676:
677: case i_JMP: /* often used also for "inlined" function calls... */
678: case i_Bcc: /* both BRA & BCC */
679: case i_FBcc:
680: case i_DBcc:
681: case i_FDBcc:
682: return CALL_BRANCH;
683:
684: case i_TRAP:
685: case i_TRAPV:
686: case i_TRAPcc:
687: case i_FTRAPcc:
688: case i_STOP:
689: case i_ILLG:
690: case i_CHK:
691: case i_CHK2:
692: case i_BKPT:
693: return CALL_EXCEPTION;
694:
695: case i_RTE:
696: return CALL_EXCRETURN;
697: }
698: /* just moved to next instruction? */
699: if (is_prev_instr(prev_pc, pc)) {
700: return CALL_NEXT;
701: }
702: return CALL_UNKNOWN;
703: }
704:
705: /**
706: * If call tracking is enabled (there are symbols), collect
707: * information about subroutine and other calls, and their costs.
708: *
709: * Like with profile data, caller info checks need to be for previous
710: * instruction, that's why "pc" argument for this function actually
711: * needs to be previous PC.
712: */
713: static void collect_calls(Uint32 pc, counters_t *counters)
714: {
715: calltype_t flag;
716: int idx, family;
717: Uint32 prev_pc, caller_pc;
718:
719: family = cpu_profile.prev_family;
720: cpu_profile.prev_family = OpcodeFamily;
721:
722: prev_pc = cpu_callinfo.prev_pc;
723: cpu_callinfo.prev_pc = pc;
724: caller_pc = PC_UNDEFINED;
725:
726: /* address is return address for last subroutine call? */
727: if (unlikely(pc == cpu_callinfo.return_pc) && likely(cpu_callinfo.depth)) {
728:
729: flag = cpu_opcode_type(family, prev_pc, pc);
1.1.1.2 ! root 730: /* previous address can be exception return (e.g. RTE) instead of RTS,
! 731: * if exception occurred right after returning from subroutine call.
1.1 root 732: */
733: if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) {
734: caller_pc = Profile_CallEnd(&cpu_callinfo, counters);
735: } else {
736: #if DEBUG
737: /* although at return address, it didn't return yet,
738: * e.g. because there was a jsr or jump to return address
739: */
740: Uint32 nextpc;
1.1.1.2 ! root 741: fprintf(stderr, "WARNING: subroutine call returned 0x%x -> 0x%x, not through RTS!\n", prev_pc, pc);
1.1 root 742: Disasm(stderr, prev_pc, &nextpc, 1);
743: #endif
744: }
1.1.1.2 ! root 745: /* next address might be another symbol, so need to fall through */
1.1 root 746: }
747:
748: /* address is one which we're tracking? */
749: idx = Symbols_GetCpuAddressIndex(pc);
750: if (unlikely(idx >= 0)) {
751:
752: flag = cpu_opcode_type(family, prev_pc, pc);
1.1.1.2 ! root 753: if (flag == CALL_SUBROUTINE || flag == CALL_EXCEPTION) {
1.1 root 754: /* special HACK for for EmuTOS AES switcher which
755: * changes stack content to remove itself from call
756: * stack and uses RTS for subroutine *calls*, not
757: * for returning from them.
758: *
759: * It wouldn't be reliable to detect calls from it,
760: * so I'm making call *to* it show up as branch, to
761: * keep callstack depth correct.
762: */
763: if (unlikely(pc == etos_switcher)) {
764: flag = CALL_BRANCH;
765: } else if (unlikely(prev_pc == PC_UNDEFINED)) {
766: /* if first profiled instruction
767: * is subroutine call, it doesn't have
768: * valid prev_pc value stored
769: */
770: cpu_callinfo.return_pc = PC_UNDEFINED;
771: fprintf(stderr, "WARNING: previous PC from callinfo for 0x%d is undefined!\n", pc);
772: #if DEBUG
773: skip_assert = true;
774: DebugUI(REASON_CPU_EXCEPTION);
775: #endif
776: } else {
777: /* slow! */
778: cpu_callinfo.return_pc = Disasm_GetNextPC(prev_pc);
779: }
780: } else if (caller_pc != PC_UNDEFINED) {
1.1.1.2 ! root 781: /* returned from function to first instruction of another symbol:
1.1 root 782: * 0xf384 jsr some_function
783: * other_symbol:
784: * 0f3x8a some_instruction
785: * -> change return instruction address to
786: * address of what did the returned call.
787: */
788: prev_pc = caller_pc;
789: assert(is_prev_instr(prev_pc, pc));
790: flag = CALL_NEXT;
791: }
792: Profile_CallStart(idx, &cpu_callinfo, prev_pc, flag, pc, counters);
793: }
794: }
795:
796: /**
1.1.1.2 ! root 797: * log last loop info, if there's suitable data for one
! 798: */
! 799: static void log_last_loop(void)
! 800: {
! 801: unsigned len = cpu_profile.loop_end - cpu_profile.loop_start;
! 802: if (cpu_profile.loop_count > 1 && (len < profile_loop.cpu_limit || !profile_loop.cpu_limit)) {
! 803: fprintf(profile_loop.fp, "CPU %d 0x%06x %d %d\n", nVBLs,
! 804: cpu_profile.loop_start, len, cpu_profile.loop_count);
! 805: }
! 806: }
! 807:
! 808: /**
1.1 root 809: * Update CPU cycle and count statistics for PC address.
810: *
811: * This gets called after instruction has executed and PC
812: * has advanced to next instruction.
813: */
814: void Profile_CpuUpdate(void)
815: {
816: counters_t *counters = &(cpu_profile.all);
817: Uint32 pc, prev_pc, idx, cycles, misses;
818: cpu_profile_item_t *prev;
819:
820: prev_pc = cpu_profile.prev_pc;
821: /* PC may have extra bits, they need to be masked away as
822: * emulation itself does that too when PC value is used
823: */
824: cpu_profile.prev_pc = pc = M68000_GetPC() & 0xffffff;
825:
1.1.1.2 ! root 826: if (unlikely(profile_loop.fp)) {
! 827: if (pc < prev_pc) {
! 828: if (pc == cpu_profile.loop_start && prev_pc == cpu_profile.loop_end) {
! 829: cpu_profile.loop_count++;
! 830: } else {
! 831: cpu_profile.loop_start = pc;
! 832: cpu_profile.loop_end = prev_pc;
! 833: cpu_profile.loop_count = 1;
! 834: }
! 835: } else {
! 836: if (pc > cpu_profile.loop_end) {
! 837: log_last_loop();
! 838: cpu_profile.loop_end = 0xffffffff;
! 839: cpu_profile.loop_count = 0;
! 840: }
! 841: }
! 842: }
! 843:
1.1 root 844: idx = address2index(prev_pc);
845: assert(idx <= cpu_profile.size);
846: prev = cpu_profile.data + idx;
847:
848: if (likely(prev->count < MAX_CPU_PROFILE_VALUE)) {
849: prev->count++;
850: }
851:
852: #if USE_CYCLES_COUNTER
853: /* Confusingly, with DSP enabled, cycle counter is for this instruction,
854: * without DSP enabled, it's a monotonically increasing counter.
855: */
856: if (bDspEnabled) {
857: cycles = Cycles_GetCounter(CYCLES_COUNTER_CPU);
858: } else {
859: Uint32 newcycles = Cycles_GetCounter(CYCLES_COUNTER_CPU);
860: cycles = newcycles - cpu_profile.prev_cycles;
861: cpu_profile.prev_cycles = newcycles;
862: }
863: #else
864: cycles = CurrentInstrCycles + nWaitStateCycles;
865: #endif
866: /* cycles are based on 8Mhz clock, change them to correct one */
867: cycles <<= nCpuFreqShift;
868:
869: if (likely(prev->cycles < MAX_CPU_PROFILE_VALUE - cycles)) {
870: prev->cycles += cycles;
871: } else {
872: prev->cycles = MAX_CPU_PROFILE_VALUE;
873: }
874:
875: #if ENABLE_WINUAE_CPU
876: misses = CpuInstruction.iCacheMisses;
877: assert(misses < MAX_MISS);
878: cpu_profile.miss_counts[misses]++;
879: if (likely(prev->misses < MAX_CPU_PROFILE_VALUE - misses)) {
880: prev->misses += misses;
881: } else {
882: prev->misses = MAX_CPU_PROFILE_VALUE;
883: }
884: #else
885: misses = 0;
886: #endif
887: if (cpu_callinfo.sites) {
888: collect_calls(prev_pc, counters);
889: }
890: /* counters are increased after caller info is processed,
891: * otherwise cost for the instruction calling the callee
892: * doesn't get accounted to caller (but callee).
893: */
894: counters->misses += misses;
895: counters->cycles += cycles;
896: counters->count++;
897:
898: #if DEBUG
899: if (unlikely(OpcodeFamily == 0)) {
900: Uint32 nextpc;
901: fputs("WARNING: instruction opcode family is zero (=i_ILLG) for instruction:\n", stderr);
902: Disasm(stderr, prev_pc, &nextpc, 1);
903: }
904: /* catch too large (and negative) cycles for other than STOP instruction */
905: if (unlikely(cycles > 512 && OpcodeFamily != i_STOP)) {
906: Uint32 nextpc;
907: fprintf(stderr, "WARNING: cycles %d > 512:\n", cycles);
908: Disasm(stderr, prev_pc, &nextpc, 1);
909: }
910: if (unlikely(cycles == 0)) {
911: Uint32 nextpc;
912: fputs("WARNING: Zero cycles for an opcode:\n", stderr);
913: Disasm(stderr, prev_pc, &nextpc, 1);
914: }
915: #endif
916: }
917:
918:
919: /**
920: * Helper for accounting CPU profile area item.
921: */
922: static void update_area_item(profile_area_t *area, Uint32 addr, cpu_profile_item_t *item)
923: {
924: Uint32 cycles = item->cycles;
925: Uint32 count = item->count;
926:
927: if (!count) {
928: return;
929: }
930: area->counters.count += count;
931: area->counters.misses += item->misses;
932: area->counters.cycles += cycles;
933:
934: if (cycles == MAX_CPU_PROFILE_VALUE) {
935: area->overflow = true;
936: }
937: if (addr < area->lowest) {
938: area->lowest = addr;
939: }
940: area->highest = addr;
941:
942: area->active++;
943: }
944:
945: /**
946: * Helper for collecting CPU profile area statistics.
947: */
948: static Uint32 update_area(profile_area_t *area, Uint32 start, Uint32 end)
949: {
950: cpu_profile_item_t *item;
951: Uint32 addr;
952:
953: memset(area, 0, sizeof(profile_area_t));
954: area->lowest = cpu_profile.size;
955:
956: item = &(cpu_profile.data[start]);
957: for (addr = start; addr < end; addr++, item++) {
958: update_area_item(area, addr, item);
959: }
960: return addr;
961: }
962:
963: /**
964: * Helper for initializing CPU profile area sorting indexes.
965: */
966: static Uint32* index_area(profile_area_t *area, Uint32 *sort_arr)
967: {
968: cpu_profile_item_t *item;
969: Uint32 addr;
970:
971: item = &(cpu_profile.data[area->lowest]);
972: for (addr = area->lowest; addr <= area->highest; addr++, item++) {
973: if (item->count) {
974: *sort_arr++ = addr;
975: }
976: }
977: return sort_arr;
978: }
979:
980: /**
981: * Stop and process the CPU profiling data; collect stats and
982: * prepare for more optimal sorting.
983: */
984: void Profile_CpuStop(void)
985: {
986: Uint32 *sort_arr, next;
987: int active;
988:
989: if (cpu_profile.processed || !cpu_profile.enabled) {
990: return;
991: }
1.1.1.2 ! root 992:
! 993: log_last_loop();
! 994: if (profile_loop.fp) {
! 995: fflush(profile_loop.fp);
! 996: }
! 997:
1.1 root 998: /* user didn't change RAM or TOS size in the meanwhile? */
999: assert(cpu_profile.size == (STRamEnd + 0x20000 + TosSize) / 2);
1000:
1001: Profile_FinalizeCalls(&(cpu_callinfo), &(cpu_profile.all), Symbols_GetByCpuAddress);
1002:
1003: /* find lowest and highest addresses executed etc */
1004: next = update_area(&cpu_profile.ram, 0, STRamEnd/2);
1005: next = update_area(&cpu_profile.tos, next, (STRamEnd + TosSize)/2);
1006: next = update_area(&cpu_profile.rom, next, cpu_profile.size);
1007: assert(next == cpu_profile.size);
1008:
1009: #if DEBUG
1010: if (skip_assert) {
1011: skip_assert = false;
1012: } else
1013: #endif
1014: {
1015: assert(cpu_profile.all.misses == cpu_profile.ram.counters.misses + cpu_profile.tos.counters.misses + cpu_profile.rom.counters.misses);
1016: assert(cpu_profile.all.cycles == cpu_profile.ram.counters.cycles + cpu_profile.tos.counters.cycles + cpu_profile.rom.counters.cycles);
1017: assert(cpu_profile.all.count == cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count);
1018: }
1019:
1020: /* allocate address array for sorting */
1021: active = cpu_profile.ram.active + cpu_profile.rom.active + cpu_profile.tos.active;
1022: sort_arr = calloc(active, sizeof(*sort_arr));
1023:
1024: if (!sort_arr) {
1025: perror("ERROR: allocating CPU profile address data");
1026: free(cpu_profile.data);
1027: cpu_profile.data = NULL;
1028: return;
1029: }
1030: printf("Allocated CPU profile address buffer (%d KB).\n",
1031: (int)sizeof(*sort_arr)*(active+512)/1024);
1032: cpu_profile.sort_arr = sort_arr;
1033: cpu_profile.active = active;
1034:
1035: /* and fill addresses for used instructions... */
1036: sort_arr = index_area(&cpu_profile.ram, sort_arr);
1037: sort_arr = index_area(&cpu_profile.tos, sort_arr);
1038: sort_arr = index_area(&cpu_profile.rom, sort_arr);
1039: assert(sort_arr == cpu_profile.sort_arr + cpu_profile.active);
1040: //printf("%d/%d/%d\n", area->active, sort_arr-cpu_profile.sort_arr, active);
1041:
1042: Profile_CpuShowStats();
1043: cpu_profile.processed = true;
1044: }
1045:
1046: /**
1047: * Get pointers to CPU profile enabling and disasm address variables
1048: * for updating them (in parser).
1049: */
1050: void Profile_CpuGetPointers(bool **enabled, Uint32 **disasm_addr)
1051: {
1052: *disasm_addr = &cpu_profile.disasm_addr;
1053: *enabled = &cpu_profile.enabled;
1054: }
1055:
1056: /**
1057: * Get callinfo & symbol search pointers for stack walking.
1058: */
1059: void Profile_CpuGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32))
1060: {
1061: *callinfo = &(cpu_callinfo);
1062: *get_symbol = Symbols_GetByCpuAddress;
1063: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.