Annotation of hatari/src/debug/profilecpu.c, revision 1.1.1.5

1.1       root        1: /*
                      2:  * Hatari - profilecpu.c
                      3:  * 
1.1.1.3   root        4:  * Copyright (C) 2010-2015 by Eero Tamminen
1.1       root        5:  *
                      6:  * This file is distributed under the GNU General Public License, version 2
                      7:  * or at your option any later version. Read the file gpl.txt for details.
                      8:  *
                      9:  * profilecpu.c - functions for profiling CPU and showing the results.
                     10:  */
                     11: const char Profilecpu_fileid[] = "Hatari profilecpu.c : " __DATE__ " " __TIME__;
                     12: 
                     13: #include <stdio.h>
                     14: #include <inttypes.h>
                     15: #include <assert.h>
                     16: #include "main.h"
                     17: #include "configuration.h"
                     18: #include "clocks_timings.h"
                     19: #include "debugInfo.h"
                     20: #include "dsp.h"
                     21: #include "m68000.h"
                     22: #include "68kDisass.h"
1.1.1.5 ! root       23: #include "symbols.h"
1.1       root       24: #include "profile.h"
                     25: #include "profile_priv.h"
1.1.1.5 ! root       26: #include "debug_priv.h"
1.1       root       27: #include "stMemory.h"
                     28: #include "tos.h"
1.1.1.2   root       29: #include "screen.h"
                     30: #include "video.h"
                     31: 
                     32: 
                     33: /* cartridge area */
                     34: #define CART_START     0xFA0000
                     35: #define CART_END       0xFC0000
                     36: #define CART_SIZE      (CART_END - CART_START)
                     37: 
1.1.1.3   root       38: #define TTRAM_START    0x01000000
1.1       root       39: 
                     40: /* if non-zero, output (more) warnings on suspicious:
                     41:  * - cycle/instruction counts
                     42:  * - PC switches
1.1.1.3   root       43:  * And drop to debugger on invalid current & previous PC addresses.
                     44:  *
                     45:  * NOTE: DebugUI() calls that DEBUG define enables, can cause
                     46:  * instruction count mismatch assertions because debugger invocation
                     47:  * resets the counters AND happens in middle of data collection.
                     48:  * It's best to quit after debugging the issue ('q' command).
1.1       root       49:  */
                     50: #define DEBUG 0
                     51: #if DEBUG
                     52: #include "debugui.h"
                     53: static bool skip_assert;
                     54: #endif
                     55: 
1.1.1.5 ! root       56: /* whether to track & show all cache stats for all instructions */
        !            57: #define DEBUG_CACHE 0
        !            58: 
        !            59: 
1.1       root       60: static callinfo_t cpu_callinfo;
                     61: 
                     62: #define MAX_CPU_PROFILE_VALUE 0xFFFFFFFF
                     63: 
                     64: typedef struct {
1.1.1.5 ! root       65:        Uint32 count;   /* how many times this address instruction is executed */
1.1       root       66:        Uint32 cycles;  /* how many CPU cycles was taken at this address */
1.1.1.5 ! root       67: #if DEBUG_CACHE                  /* track also less relevant cache events */
        !            68:        Uint32 i_hits;    /* how many CPU i-cache hits happened at this address */
        !            69:        Uint32 d_misses;  /* how many CPU d-cache misses happened at this address */
        !            70: #endif
        !            71:        Uint32 i_misses;  /* how many CPU i-cache misses happened at this address */
        !            72:        Uint32 d_hits;    /* how many CPU d-cache hits happened at this address */
1.1       root       73: } cpu_profile_item_t;
                     74: 
1.1.1.5 ! root       75: 
        !            76: /* max count of hits/misses single instruction can trigger at once */
1.1.1.3   root       77: #define MAX_I_HITS   8
                     78: #define MAX_I_MISSES 8
                     79: #define MAX_D_HITS   32
                     80: #define MAX_D_MISSES 20
1.1       root       81: 
                     82: static struct {
                     83:        counters_t all;       /* total counts for all areas */
                     84:        cpu_profile_item_t *data; /* profile data items */
                     85:        Uint32 size;          /* number of allocated profile data items */
1.1.1.3   root       86:        profile_area_t ttram; /* TT-RAM stats */
1.1       root       87:        profile_area_t ram;   /* normal RAM stats */
                     88:        profile_area_t rom;   /* cartridge ROM stats */
                     89:        profile_area_t tos;   /* ROM TOS stats */
                     90:        int active;           /* number of active data items in all areas */
                     91:        Uint32 *sort_arr;     /* data indexes used for sorting */
1.1.1.2   root       92:        int prev_family;      /* previous instruction opcode family */
1.1.1.3   root       93:        Uint64 prev_cycles;   /* previous instruction cycles counter */
1.1       root       94:        Uint32 prev_pc;       /* previous instruction address */
1.1.1.2   root       95:        Uint32 loop_start;    /* address of last loop start */
                     96:        Uint32 loop_end;      /* address of last loop end */
                     97:        Uint32 loop_count;    /* how many times it was looped */
1.1       root       98:        Uint32 disasm_addr;   /* 'addresses' command start address */
1.1.1.3   root       99: #if ENABLE_WINUAE_CPU
1.1.1.5 ! root      100:        Uint32 i_prefetched;  /* instructions that don't incur prefetch hit/miss */
1.1.1.3   root      101:        Uint32 i_hit_counts[MAX_I_HITS];    /* I-cache hit counts */
                    102:        Uint32 d_hit_counts[MAX_D_HITS];    /* D-cache hit counts */
                    103:        Uint32 i_miss_counts[MAX_I_MISSES]; /* I-cache miss counts */
                    104:        Uint32 d_miss_counts[MAX_D_MISSES]; /* D-cache miss counts */
                    105: #endif
1.1       root      106:        bool processed;       /* true when data is already processed */
                    107:        bool enabled;         /* true when profiling enabled */
                    108: } cpu_profile;
                    109: 
                    110: /* special hack for EmuTOS */
                    111: static Uint32 etos_switcher;
                    112: 
                    113: 
                    114: /* ------------------ CPU profile address mapping ----------------- */
                    115: 
                    116: /**
                    117:  * convert Atari memory address to sorting array profile data index.
                    118:  */
                    119: static inline Uint32 address2index(Uint32 pc)
                    120: {
                    121:        if (unlikely(pc & 1)) {
                    122:                fprintf(stderr, "WARNING: odd CPU profile instruction address 0x%x!\n", pc);
                    123: #if DEBUG
                    124:                skip_assert = true;
                    125:                DebugUI(REASON_CPU_EXCEPTION);
                    126: #endif
                    127:        }
1.1.1.2   root      128:        if (pc < STRamEnd) {
                    129:                /* most likely case, use RAM address as-is */
                    130: 
                    131:        } else if (pc >= TosAddress && pc < TosAddress + TosSize) {
1.1       root      132:                /* TOS, put it after RAM data */
                    133:                pc = pc - TosAddress + STRamEnd;
1.1.1.2   root      134:                if (TosAddress >= CART_END) {
                    135:                        /* and after cartridge data as it's higher */
                    136:                        pc += CART_SIZE;
                    137:                }
                    138:        } else if (pc >= CART_START && pc < CART_END) {
                    139:                /* ROM, put it after RAM data */
                    140:                pc = pc - CART_START + STRamEnd;
                    141:                if (TosAddress < CART_START) {
                    142:                        /* and after TOS as it's higher */
                    143:                        pc += TosSize;
                    144:                }
1.1.1.3   root      145: #if ENABLE_WINUAE_CPU
1.1.1.5 ! root      146:        } else if (TTmemory && pc >= TTRAM_START && pc < TTRAM_START + 1024*(unsigned)ConfigureParams.Memory.TTRamSize_KB) {
1.1.1.3   root      147:                pc += STRamEnd + TosSize + CART_SIZE - TTRAM_START;
                    148: #endif
1.1       root      149:        } else {
1.1.1.2   root      150:                fprintf(stderr, "WARNING: 'invalid' CPU PC profile instruction address 0x%x!\n", pc);
                    151:                /* extra entry at end is reserved for invalid PC values */
1.1.1.3   root      152:                pc = STRamEnd + TosSize + CART_SIZE;
1.1       root      153: #if DEBUG
1.1.1.2   root      154:                skip_assert = true;
                    155:                DebugUI(REASON_CPU_EXCEPTION);
1.1       root      156: #endif
                    157:        }
                    158:        /* CPU instructions are at even addresses, save space by halving */
                    159:        return (pc >> 1);
                    160: }
                    161: 
                    162: /**
                    163:  * convert sorting array profile data index to Atari memory address.
                    164:  */
                    165: static Uint32 index2address(Uint32 idx)
                    166: {
                    167:        idx <<= 1;
                    168:        /* RAM */
                    169:        if (idx < STRamEnd) {
                    170:                return idx;
                    171:        }
                    172:        idx -= STRamEnd;
1.1.1.2   root      173:        /* TOS before cartridge area? */
                    174:        if (TosAddress < CART_START) {
                    175:                /* TOS */
                    176:                if (idx < TosSize) {
                    177:                        return idx + TosAddress;
                    178:                }
                    179:                idx -= TosSize;
                    180:                /* ROM */
1.1.1.3   root      181:                if (idx < CART_SIZE) {
                    182:                        return idx + CART_START;
                    183:                }
                    184:                idx -= CART_SIZE;
1.1.1.2   root      185:        } else {
                    186:                /* ROM */
                    187:                if (idx < CART_SIZE) {
                    188:                        return idx + CART_START;
                    189:                }
                    190:                idx -= CART_SIZE;
                    191:                /* TOS */
1.1.1.3   root      192:                if (idx < TosSize) {
                    193:                        return idx + TosAddress;
                    194:                }
                    195:                idx -= TosSize;
1.1       root      196:        }
1.1.1.3   root      197:        return idx + TTRAM_START;
1.1       root      198: }
                    199: 
                    200: /* ------------------ CPU profile results ----------------- */
                    201: 
                    202: /**
1.1.1.5 ! root      203:  * Write string containing CPU cache stats, cycles, count, count percentage
        !           204:  * for given address to provided buffer.
        !           205:  *
1.1       root      206:  * Return true if data was available and non-zero, false otherwise.
                    207:  */
1.1.1.5 ! root      208: bool Profile_CpuAddressDataStr(char *buffer, size_t maxlen, Uint32 addr)
1.1       root      209: {
1.1.1.5 ! root      210:        cpu_profile_item_t *item;
        !           211:        float percentage;
1.1       root      212:        Uint32 idx;
1.1.1.5 ! root      213: 
        !           214:        assert(buffer && maxlen);
1.1       root      215:        if (!cpu_profile.data) {
                    216:                return false;
                    217:        }
                    218:        idx = address2index(addr);
1.1.1.5 ! root      219:        item = &(cpu_profile.data[idx]);
        !           220: 
1.1       root      221:        if (cpu_profile.all.count) {
1.1.1.5 ! root      222:                percentage = 100.0 * item->count / cpu_profile.all.count;
1.1       root      223:        } else {
1.1.1.5 ! root      224:                percentage = 0.0;
1.1       root      225:        }
1.1.1.5 ! root      226: #if DEBUG_CACHE
        !           227:        snprintf(buffer, maxlen, "%5.2f%% (%u, %u, %u, %u, %u, %u)",
        !           228:                 percentage, item->count, item->cycles,
        !           229:                 item->i_hits, item->i_misses,
        !           230:                 item->d_hits, item->d_misses);
        !           231: #else
        !           232:        snprintf(buffer, maxlen, "%5.2f%% (%u, %u, %u, %u)",
        !           233:                 percentage, item->count, item->cycles,
        !           234:                 item->i_misses, item->d_hits);
        !           235: #endif
        !           236:        return (item->count > 0);
1.1       root      237: }
                    238: 
                    239: /**
                    240:  * Helper to show statistics for specified CPU profile area.
                    241:  */
                    242: static void show_cpu_area_stats(profile_area_t *area)
                    243: {
                    244:        if (!area->active) {
                    245:                fprintf(stderr, "- no activity\n");
                    246:                return;
                    247:        }
                    248:        fprintf(stderr, "- active address range:\n  0x%06x-0x%06x\n",
                    249:                index2address(area->lowest),
                    250:                index2address(area->highest));
1.1.1.5 ! root      251:        fprintf(stderr, "- active instruction addresses:\n  %d (%.2f%% of all areas)\n",
1.1       root      252:                area->active,
                    253:                100.0 * area->active / cpu_profile.active);
1.1.1.5 ! root      254:        fprintf(stderr, "- executed instructions:\n  %"PRIu64" (%.2f%% of all areas)\n",
1.1       root      255:                area->counters.count,
                    256:                100.0 * area->counters.count / cpu_profile.all.count);
1.1.1.3   root      257:        /* CPU cache in use? */
                    258:        if (cpu_profile.all.i_misses) {
1.1.1.5 ! root      259:                fprintf(stderr, "- instruction cache misses:\n  %"PRIu64" (%.2f%% of all areas)\n",
1.1.1.3   root      260:                        area->counters.i_misses,
                    261:                        100.0 * area->counters.i_misses / cpu_profile.all.i_misses);
                    262:        }
                    263:        if (cpu_profile.all.d_hits) {
1.1.1.5 ! root      264:                fprintf(stderr, "- data cache hits:\n  %"PRIu64" (%.2f%% of all areas)\n",
1.1.1.3   root      265:                        area->counters.d_hits,
                    266:                        100.0 * area->counters.d_hits / cpu_profile.all.d_hits);
1.1       root      267:        }
1.1.1.5 ! root      268:        fprintf(stderr, "- used cycles:\n  %"PRIu64" (%.2f%% of all areas)\n  = %.5fs\n",
1.1       root      269:                area->counters.cycles,
                    270:                100.0 * area->counters.cycles / cpu_profile.all.cycles,
1.1.1.5 ! root      271:                (double)area->counters.cycles / MachineClocks.CPU_Freq_Emul);
1.1       root      272:        if (area->overflow) {
                    273:                fprintf(stderr, "  *** COUNTER OVERFLOW! ***\n");
                    274:        }
                    275: }
                    276: 
                    277: 
                    278: /**
                    279:  * show CPU area (RAM, ROM, TOS) specific statistics.
                    280:  */
                    281: void Profile_CpuShowStats(void)
                    282: {
                    283:        fprintf(stderr, "Normal RAM (0-0x%X):\n", STRamEnd);
                    284:        show_cpu_area_stats(&cpu_profile.ram);
                    285: 
                    286:        fprintf(stderr, "ROM TOS (0x%X-0x%X):\n", TosAddress, TosAddress + TosSize);
                    287:        show_cpu_area_stats(&cpu_profile.tos);
                    288: 
1.1.1.2   root      289:        fprintf(stderr, "Cartridge ROM (0x%X-%X):\n", CART_START, CART_END);
1.1       root      290:        show_cpu_area_stats(&cpu_profile.rom);
                    291: 
1.1.1.5 ! root      292:        if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) {
        !           293:                fprintf(stderr, "TT-RAM (0x%X-%X):\n", TTRAM_START, TTRAM_START + 1024*ConfigureParams.Memory.TTRamSize_KB);
1.1.1.3   root      294:                show_cpu_area_stats(&cpu_profile.ttram);
                    295:        }
                    296: 
1.1       root      297:        fprintf(stderr, "\n= %.5fs\n",
1.1.1.5 ! root      298:                (double)cpu_profile.all.cycles / MachineClocks.CPU_Freq_Emul);
1.1.1.3   root      299: }
1.1       root      300: 
                    301: #if ENABLE_WINUAE_CPU
1.1.1.3   root      302: /**
                    303:  * show percentage histogram of given array items
                    304:  */
                    305: static void show_histogram(const char *title, int count, Uint32 *items)
                    306: {
1.1.1.5 ! root      307:        const Uint64 maxval = cpu_profile.all.count;
1.1.1.3   root      308:        Uint32 value;
                    309:        int i;
                    310: 
1.1.1.5 ! root      311:        fprintf(stderr, "\n%s, number of occurrences:\n", title);
1.1.1.3   root      312:        for (i = 0; i < count; i++) {
                    313:                value = items[i];
                    314:                if (value) {
                    315:                        int w, width = 50 * value / maxval+1;
                    316:                        fprintf(stderr, " %2d: ", i);
                    317:                        for (w = 0; w < width; w++) {
                    318:                                fputc('#', stderr);
                    319:                        }
                    320:                        fprintf(stderr, " %.3f%%\n", 100.0 * value / maxval);
1.1       root      321:                }
                    322:        }
                    323: }
                    324: 
                    325: /**
1.1.1.3   root      326:  * show CPU cache usage histograms
                    327:  */
                    328: void Profile_CpuShowCaches(void)
                    329: {
                    330:        if (!(cpu_profile.all.i_misses || cpu_profile.all.d_hits)) {
1.1.1.5 ! root      331:                fprintf(stderr, "No instruction/data cache information.\n");
1.1.1.3   root      332:                return;
                    333:        }
1.1.1.5 ! root      334:        fprintf(stderr,
        !           335:                "\nNote:\n"
        !           336:                "- these statistics include all profiled instructions, but\n"
        !           337:                "- instruction cache events happen only on prefetch/branch\n"
        !           338:                "- data cache events can happen only for instructions that do memory reads\n"
        !           339:                "\nAlready prefetched instructions: %.3f%% (no hits/misses)\n",
        !           340:                100.0 * cpu_profile.i_prefetched / cpu_profile.all.count);
        !           341: 
1.1.1.3   root      342:        show_histogram("Instruction cache hits per instruction",
1.1.1.4   root      343:                       ARRAY_SIZE(cpu_profile.i_hit_counts), cpu_profile.i_hit_counts);
1.1.1.3   root      344:        show_histogram("Instruction cache misses per instruction",
1.1.1.4   root      345:                       ARRAY_SIZE(cpu_profile.i_miss_counts), cpu_profile.i_miss_counts);
1.1.1.3   root      346:        show_histogram("Data cache hits per instruction",
1.1.1.4   root      347:                       ARRAY_SIZE(cpu_profile.d_hit_counts), cpu_profile.d_hit_counts);
1.1.1.3   root      348:        show_histogram("Data cache misses per instruction",
1.1.1.4   root      349:                       ARRAY_SIZE(cpu_profile.d_miss_counts), cpu_profile.d_miss_counts);
1.1.1.3   root      350: }
                    351: #else
                    352: void Profile_CpuShowCaches(void) {
                    353:        fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
                    354: }
                    355: #endif
                    356: 
                    357: /**
1.1       root      358:  * Show CPU instructions which execution was profiled, in the address order,
                    359:  * starting from the given address.  Return next disassembly address.
                    360:  */
1.1.1.5 ! root      361: Uint32 Profile_CpuShowAddresses(Uint32 lower, Uint32 upper, FILE *out, paging_t use_paging)
1.1       root      362: {
                    363:        int oldcols[DISASM_COLUMNS], newcols[DISASM_COLUMNS];
1.1.1.5 ! root      364:        int show, shown, addrs, active;
1.1       root      365:        const char *symbol;
                    366:        cpu_profile_item_t *data;
                    367:        Uint32 idx, end, size;
                    368:        uaecptr nextpc, addr;
                    369: 
                    370:        data = cpu_profile.data;
                    371:        if (!data) {
                    372:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
                    373:                return 0;
                    374:        }
                    375: 
                    376:        size = cpu_profile.size;
                    377:        active = cpu_profile.active;
                    378:        if (upper) {
                    379:                end = address2index(upper);
                    380:                show = active;
                    381:                if (end > size) {
                    382:                        end = size;
                    383:                }
                    384:        } else {
                    385:                end = size;
1.1.1.5 ! root      386:                show = DebugUI_GetPageLines(ConfigureParams.Debugger.nDisasmLines, 0);
1.1       root      387:                if (!show || show > active) {
                    388:                        show = active;
                    389:                }
                    390:        }
1.1.1.5 ! root      391:        if (use_paging == PAGING_DISABLED) {
        !           392:                show = INT_MAX;
        !           393:        }
1.1       root      394: 
                    395:        /* get/change columns */
                    396:        Disasm_GetColumns(oldcols);
                    397:        Disasm_DisableColumn(DISASM_COLUMN_HEXDUMP, oldcols, newcols);
                    398:        Disasm_SetColumns(newcols);
                    399: 
1.1.1.3   root      400:        fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <sum of i-cache misses>, <sum of d-cache hits>)\n", out);
1.1.1.5 ! root      401:        shown = 2; /* first and last printf */
1.1       root      402: 
1.1.1.5 ! root      403:        addrs = nextpc = 0;
1.1       root      404:        idx = address2index(lower);
1.1.1.5 ! root      405:        for (; shown < show && idx < end; idx++) {
1.1       root      406:                if (!data[idx].count) {
                    407:                        continue;
                    408:                }
                    409:                addr = index2address(idx);
                    410:                if (addr != nextpc && nextpc) {
                    411:                        fprintf(out, "[...]\n");
1.1.1.5 ! root      412:                        shown++;
1.1       root      413:                }
1.1.1.5 ! root      414:                symbol = Symbols_GetByCpuAddress(addr, SYMTYPE_TEXT);
1.1       root      415:                if (symbol) {
                    416:                        fprintf(out, "%s:\n", symbol);
1.1.1.5 ! root      417:                        shown++;
1.1       root      418:                }
                    419:                /* NOTE: column setup works only with 68kDisass disasm engine! */
                    420:                Disasm(out, addr, &nextpc, 1);
                    421:                shown++;
1.1.1.5 ! root      422:                addrs++;
1.1       root      423:        }
1.1.1.5 ! root      424:        printf("Disassembled %d (of active %d) CPU addresses.\n", addrs, active);
1.1       root      425: 
                    426:        /* restore disassembly columns */
                    427:        Disasm_SetColumns(oldcols);
                    428:        return nextpc;
                    429: }
                    430: 
                    431: /**
                    432:  * remove all disassembly columns except instruction ones.
                    433:  * data needed to restore columns is stored to "oldcols"
                    434:  */
                    435: static void leave_instruction_column(int *oldcols)
                    436: {
                    437:        int i, newcols[DISASM_COLUMNS];
                    438: 
                    439:        Disasm_GetColumns(oldcols);
                    440:        for (i = 0; i < DISASM_COLUMNS; i++) {
                    441:                if (i == DISASM_COLUMN_OPCODE || i == DISASM_COLUMN_OPERAND) {
                    442:                        continue;
                    443:                }
                    444:                Disasm_DisableColumn(i, oldcols, newcols);
                    445:                oldcols = newcols;
                    446:        }
                    447:        Disasm_SetColumns(newcols);
                    448: }
                    449: 
                    450: #if ENABLE_WINUAE_CPU
                    451: /**
                    452:  * compare function for qsort() to sort CPU profile data by instruction cache misses.
                    453:  */
1.1.1.3   root      454: static int cmp_cpu_i_misses(const void *p1, const void *p2)
1.1       root      455: {
1.1.1.3   root      456:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].i_misses;
                    457:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].i_misses;
1.1       root      458:        if (count1 > count2) {
                    459:                return -1;
                    460:        }
                    461:        if (count1 < count2) {
                    462:                return 1;
                    463:        }
                    464:        return 0;
                    465: }
                    466: 
                    467: /**
                    468:  * Sort CPU profile data addresses by instruction cache misses and show the results.
                    469:  */
1.1.1.3   root      470: void Profile_CpuShowInstrMisses(int show)
                    471: {
                    472:        int active;
                    473:        int oldcols[DISASM_COLUMNS];
                    474:        Uint32 *sort_arr, *end, addr, nextpc;
                    475:        cpu_profile_item_t *data = cpu_profile.data;
                    476:        float percentage;
                    477:        Uint32 count;
                    478: 
                    479:        if (!cpu_profile.all.i_misses) {
                    480:                fprintf(stderr, "No CPU instruction cache miss information available.\n");
                    481:                return;
                    482:        }
                    483: 
                    484:        active = cpu_profile.active;
                    485:        sort_arr = cpu_profile.sort_arr;
                    486:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_i_misses);
                    487: 
                    488:        leave_instruction_column(oldcols);
                    489: 
                    490:        printf("addr:\t\ti-cache misses:\n");
                    491:        show = (show < active ? show : active);
                    492:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    493:                addr = index2address(*sort_arr);
                    494:                count = data[*sort_arr].i_misses;
                    495:                percentage = 100.0*count/cpu_profile.all.i_misses;
                    496:                printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
                    497:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    498:                Disasm(stdout, addr, &nextpc, 1);
                    499:        }
                    500:        printf("%d CPU addresses listed.\n", show);
                    501: 
                    502:        Disasm_SetColumns(oldcols);
                    503: }
                    504: 
                    505: /**
                    506:  * compare function for qsort() to sort CPU profile data by data cache hits.
                    507:  */
                    508: static int cmp_cpu_d_hits(const void *p1, const void *p2)
                    509: {
                    510:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].d_hits;
                    511:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].d_hits;
                    512:        if (count1 > count2) {
                    513:                return -1;
                    514:        }
                    515:        if (count1 < count2) {
                    516:                return 1;
                    517:        }
                    518:        return 0;
                    519: }
                    520: 
                    521: /**
                    522:  * Sort CPU profile data addresses by data cache hits and show the results.
                    523:  */
                    524: void Profile_CpuShowDataHits(int show)
1.1       root      525: {
                    526:        int active;
                    527:        int oldcols[DISASM_COLUMNS];
                    528:        Uint32 *sort_arr, *end, addr, nextpc;
                    529:        cpu_profile_item_t *data = cpu_profile.data;
                    530:        float percentage;
                    531:        Uint32 count;
                    532: 
1.1.1.3   root      533:        if (!cpu_profile.all.d_hits) {
                    534:                fprintf(stderr, "No CPU data cache hit information available.\n");
1.1       root      535:                return;
                    536:        }
                    537: 
                    538:        active = cpu_profile.active;
                    539:        sort_arr = cpu_profile.sort_arr;
1.1.1.3   root      540:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_d_hits);
1.1       root      541: 
                    542:        leave_instruction_column(oldcols);
                    543: 
1.1.1.3   root      544:        printf("addr:\t\td-cache hits:\n");
1.1       root      545:        show = (show < active ? show : active);
                    546:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    547:                addr = index2address(*sort_arr);
1.1.1.3   root      548:                count = data[*sort_arr].d_hits;
                    549:                percentage = 100.0*count/cpu_profile.all.d_hits;
1.1       root      550:                printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
                    551:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    552:                Disasm(stdout, addr, &nextpc, 1);
                    553:        }
                    554:        printf("%d CPU addresses listed.\n", show);
                    555: 
                    556:        Disasm_SetColumns(oldcols);
                    557: }
1.1.1.3   root      558: 
1.1       root      559: #else
1.1.1.3   root      560: void Profile_CpuShowInstrMisses(int show) {
                    561:        fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
                    562: }
                    563: void Profile_CpuShowDataHits(int show) {
                    564:        fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
1.1       root      565: }
                    566: #endif
                    567: 
                    568: 
                    569: /**
                    570:  * compare function for qsort() to sort CPU profile data by cycles counts.
                    571:  */
                    572: static int cmp_cpu_cycles(const void *p1, const void *p2)
                    573: {
                    574:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].cycles;
                    575:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].cycles;
                    576:        if (count1 > count2) {
                    577:                return -1;
                    578:        }
                    579:        if (count1 < count2) {
                    580:                return 1;
                    581:        }
                    582:        return 0;
                    583: }
                    584: 
                    585: /**
                    586:  * Sort CPU profile data addresses by cycle counts and show the results.
                    587:  */
                    588: void Profile_CpuShowCycles(int show)
                    589: {
                    590:        int active;
                    591:        int oldcols[DISASM_COLUMNS];
                    592:        Uint32 *sort_arr, *end, addr, nextpc;
                    593:        cpu_profile_item_t *data = cpu_profile.data;
                    594:        float percentage;
                    595:        Uint32 count;
                    596: 
                    597:        if (!data) {
                    598:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
                    599:                return;
                    600:        }
                    601: 
                    602:        active = cpu_profile.active;
                    603:        sort_arr = cpu_profile.sort_arr;
                    604:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_cycles);
                    605: 
                    606:        leave_instruction_column(oldcols);
                    607: 
                    608:        printf("addr:\t\tcycles:\n");
                    609:        show = (show < active ? show : active);
                    610:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    611:                addr = index2address(*sort_arr);
                    612:                count = data[*sort_arr].cycles;
                    613:                percentage = 100.0*count/cpu_profile.all.cycles;
                    614:                printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
                    615:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    616:                Disasm(stdout, addr, &nextpc, 1);
                    617:        }
                    618:        printf("%d CPU addresses listed.\n", show);
                    619: 
                    620:        Disasm_SetColumns(oldcols);
                    621: }
                    622: 
                    623: /**
                    624:  * compare function for qsort() to sort CPU profile data by descending
                    625:  * address access counts.
                    626:  */
                    627: static int cmp_cpu_count(const void *p1, const void *p2)
                    628: {
                    629:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].count;
                    630:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].count;
                    631:        if (count1 > count2) {
                    632:                return -1;
                    633:        }
                    634:        if (count1 < count2) {
                    635:                return 1;
                    636:        }
                    637:        return 0;
                    638: }
                    639: 
                    640: /**
                    641:  * Sort CPU profile data addresses by call counts and show the results.
                    642:  * If symbols are requested and symbols are loaded, show (only) addresses
                    643:  * matching a symbol.
                    644:  */
                    645: void Profile_CpuShowCounts(int show, bool only_symbols)
                    646: {
                    647:        cpu_profile_item_t *data = cpu_profile.data;
                    648:        int symbols, matched, active;
                    649:        int oldcols[DISASM_COLUMNS];
                    650:        Uint32 *sort_arr, *end, addr, nextpc;
                    651:        const char *name;
                    652:        float percentage;
                    653:        Uint32 count;
                    654: 
                    655:        if (!data) {
                    656:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
                    657:                return;
                    658:        }
                    659:        active = cpu_profile.active;
                    660:        show = (show < active ? show : active);
                    661: 
                    662:        sort_arr = cpu_profile.sort_arr;
                    663:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_count);
                    664: 
                    665:        if (!only_symbols) {
                    666:                leave_instruction_column(oldcols);
                    667:                printf("addr:\t\tcount:\n");
                    668:                for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    669:                        addr = index2address(*sort_arr);
                    670:                        count = data[*sort_arr].count;
                    671:                        percentage = 100.0*count/cpu_profile.all.count;
                    672:                        printf("0x%06x\t%5.2f%%\t%d%s\t",
                    673:                               addr, percentage, count,
                    674:                               count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    675:                        Disasm(stdout, addr, &nextpc, 1);
                    676:                }
                    677:                printf("%d CPU addresses listed.\n", show);
                    678:                Disasm_SetColumns(oldcols);
                    679:                return;
                    680:        }
                    681: 
1.1.1.5 ! root      682:        symbols = Symbols_CpuCodeCount();
1.1       root      683:        if (!symbols) {
                    684:                fprintf(stderr, "ERROR: no CPU symbols loaded!\n");
                    685:                return;
                    686:        }
                    687:        matched = 0;    
                    688: 
                    689:        leave_instruction_column(oldcols);
                    690: 
                    691:        printf("addr:\t\tcount:\t\tsymbol:\n");
                    692:        for (end = sort_arr + active; sort_arr < end; sort_arr++) {
                    693: 
                    694:                addr = index2address(*sort_arr);
1.1.1.5 ! root      695:                name = Symbols_GetByCpuAddress(addr, SYMTYPE_TEXT);
1.1       root      696:                if (!name) {
                    697:                        continue;
                    698:                }
                    699:                count = data[*sort_arr].count;
                    700:                percentage = 100.0*count/cpu_profile.all.count;
                    701:                printf("0x%06x\t%5.2f%%\t%d\t%s%s\t",
                    702:                       addr, percentage, count, name,
                    703:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    704:                Disasm(stdout, addr, &nextpc, 1);
                    705: 
                    706:                matched++;
                    707:                if (matched >= show || matched >= symbols) {
                    708:                        break;
                    709:                }
                    710:        }
                    711:        printf("%d CPU symbols listed.\n", matched);
                    712: 
                    713:        Disasm_SetColumns(oldcols);
                    714: }
                    715: 
                    716: 
                    717: static const char * addr2name(Uint32 addr, Uint64 *total)
                    718: {
                    719:        Uint32 idx = address2index(addr);
                    720:        *total = cpu_profile.data[idx].count;
1.1.1.5 ! root      721:        return Symbols_GetByCpuAddress(addr, SYMTYPE_TEXT);
1.1       root      722: }
                    723: 
                    724: /**
                    725:  * Output CPU callers info to given file.
                    726:  */
                    727: void Profile_CpuShowCallers(FILE *fp)
                    728: {
                    729:        Profile_ShowCallers(fp, cpu_callinfo.sites, cpu_callinfo.site, addr2name);
                    730: }
                    731: 
                    732: /**
                    733:  * Save CPU profile information to given file.
                    734:  */
                    735: void Profile_CpuSave(FILE *out)
                    736: {
1.1.1.3   root      737:        Uint32 text, end;
                    738:        fputs("Field names:\tExecuted instructions, Used cycles, Instruction cache misses, Data cache hits\n", out);
                    739:        /* (Python) regexp that matches address and all described fields from disassembly:
                    740:         * $<hex>  :  <ASM>  <percentage>% (<count>, <cycles>, <i-misses>, <d-hits>)
                    741:         * $e5af38 :   rts           0.00% (12, 0, 12, 0)
1.1       root      742:         */
                    743:        fputs("Field regexp:\t^\\$([0-9a-f]+) :.*% \\((.*)\\)$\n", out);
                    744:        /* some information for interpreting the addresses */
1.1.1.3   root      745:        fprintf(out, "ST_RAM:\t\t0x%06x-0x%06x\n", 0, STRamEnd);
                    746:        end = TosAddress + TosSize;
                    747:        fprintf(out, "ROM_TOS:\t0x%06x-0x%06x\n", TosAddress, end);
                    748:        fprintf(out, "CARTRIDGE:\t0x%06x-0x%06x\n", CART_START, CART_END);
1.1       root      749:        text = DebugInfo_GetTEXT();
1.1.1.3   root      750:        if (text && (text < TosAddress || text >= TTRAM_START)) {
1.1       root      751:                fprintf(out, "PROGRAM_TEXT:\t0x%06x-0x%06x\n", text, DebugInfo_GetTEXTEnd());
                    752:        }
1.1.1.5 ! root      753:        if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) {
        !           754:                end = TTRAM_START + 1024*ConfigureParams.Memory.TTRamSize_KB;
1.1.1.3   root      755:                fprintf(out, "TT_RAM:\t\t0x%08x-0x%08x\n", TTRAM_START, end);
                    756:        } else if (end < CART_END) {
                    757:                end = CART_END;
                    758:        }
1.1.1.5 ! root      759:        Profile_CpuShowAddresses(0, end-2, out, PAGING_DISABLED);
1.1       root      760:        Profile_CpuShowCallers(out);
                    761: }
                    762: 
                    763: /* ------------------ CPU profile control ----------------- */
                    764: 
                    765: /**
                    766:  * Initialize CPU profiling when necessary.  Return true if profiling.
                    767:  */
                    768: bool Profile_CpuStart(void)
                    769: {
                    770:        int size;
                    771: 
                    772:        Profile_FreeCallinfo(&(cpu_callinfo));
                    773:        if (cpu_profile.sort_arr) {
                    774:                /* remove previous results */
                    775:                free(cpu_profile.sort_arr);
                    776:                free(cpu_profile.data);
                    777:                cpu_profile.sort_arr = NULL;
                    778:                cpu_profile.data = NULL;
                    779:                printf("Freed previous CPU profile buffers.\n");
                    780:        }
                    781:        if (!cpu_profile.enabled) {
                    782:                return false;
                    783:        }
                    784:        /* zero everything */
                    785:        memset(&cpu_profile, 0, sizeof(cpu_profile));
                    786: 
                    787:        /* Shouldn't change within same debug session */
1.1.1.3   root      788:        size = (STRamEnd + CART_SIZE + TosSize) / 2;
1.1.1.5 ! root      789:        if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) {
        !           790:                size += ConfigureParams.Memory.TTRamSize_KB * 1024/2;
1.1.1.3   root      791:        }
1.1       root      792: 
                    793:        /* Add one entry for catching invalid PC values */
                    794:        cpu_profile.data = calloc(size + 1, sizeof(*cpu_profile.data));
                    795:        if (!cpu_profile.data) {
                    796:                perror("ERROR, new CPU profile buffer alloc failed");
                    797:                return false;
                    798:        }
                    799:        printf("Allocated CPU profile buffer (%d MB).\n",
                    800:               (int)sizeof(*cpu_profile.data)*size/(1024*1024));
                    801:        cpu_profile.size = size;
                    802: 
1.1.1.5 ! root      803:        Profile_AllocCallinfo(&(cpu_callinfo), Symbols_CpuCodeCount(), "CPU");
1.1       root      804: 
                    805:        /* special hack for EmuTOS */
                    806:        etos_switcher = PC_UNDEFINED;
                    807:        if (cpu_callinfo.sites && bIsEmuTOS &&
                    808:            (!Symbols_GetCpuAddress(SYMTYPE_TEXT, "_switchto", &etos_switcher) || etos_switcher < TosAddress)) {
                    809:                etos_switcher = PC_UNDEFINED;
                    810:        }
                    811: 
1.1.1.3   root      812:        /* reset cache stats (CPU emulation doesn't do that) */
                    813:        CpuInstruction.D_Cache_hit = 0;
                    814:        CpuInstruction.I_Cache_hit = 0;
                    815:        CpuInstruction.I_Cache_miss = 0;
                    816:        CpuInstruction.D_Cache_miss = 0;
1.1       root      817: 
1.1.1.3   root      818:        cpu_profile.prev_cycles = CyclesGlobalClockCounter;
                    819:        cpu_profile.prev_family = OpcodeFamily;
                    820:        cpu_profile.prev_pc = M68000_GetPC();
                    821:        if (ConfigureParams.System.bAddressSpace24) {
                    822:                cpu_profile.prev_pc &= 0xffffff;
                    823:        }
1.1.1.2   root      824:        cpu_profile.loop_start = PC_UNDEFINED;
                    825:        cpu_profile.loop_end = PC_UNDEFINED;
                    826:        cpu_profile.loop_count = 0;
                    827:        Profile_LoopReset();
                    828: 
1.1       root      829:        cpu_profile.disasm_addr = 0;
                    830:        cpu_profile.processed = false;
                    831:        cpu_profile.enabled = true;
                    832:        return cpu_profile.enabled;
                    833: }
                    834: 
                    835: /**
                    836:  * return true if pc could be next instruction for previous pc
                    837:  */
                    838: static bool is_prev_instr(Uint32 prev_pc, Uint32 pc)
                    839: {
                    840:        /* just moved to next instruction (1-2 words)? */
                    841:        if (prev_pc < pc && (pc - prev_pc) <= 10) {
                    842:                return true;
                    843:        }
                    844:        return false;
                    845: }
                    846: 
                    847: /**
                    848:  * return caller instruction type classification
                    849:  */
                    850: static calltype_t cpu_opcode_type(int family, Uint32 prev_pc, Uint32 pc)
                    851: {
                    852:        switch (family) {
                    853: 
                    854:        case i_JSR:
                    855:        case i_BSR:
                    856:                return CALL_SUBROUTINE;
                    857: 
                    858:        case i_RTS:
                    859:        case i_RTR:
                    860:        case i_RTD:
                    861:                return CALL_SUBRETURN;
                    862: 
                    863:        case i_JMP:     /* often used also for "inlined" function calls... */
                    864:        case i_Bcc:     /* both BRA & BCC */
                    865:        case i_FBcc:
                    866:        case i_DBcc:
                    867:        case i_FDBcc:
                    868:                return CALL_BRANCH;
                    869: 
                    870:        case i_TRAP:
                    871:        case i_TRAPV:
                    872:        case i_TRAPcc:
                    873:        case i_FTRAPcc:
                    874:        case i_STOP:
                    875:        case i_ILLG:
                    876:        case i_CHK:
                    877:        case i_CHK2:
                    878:        case i_BKPT:
                    879:                return CALL_EXCEPTION;
                    880: 
                    881:        case i_RTE:
                    882:                return CALL_EXCRETURN;
                    883:        }
                    884:        /* just moved to next instruction? */
                    885:        if (is_prev_instr(prev_pc, pc)) {
                    886:                return CALL_NEXT;
                    887:        }
                    888:        return CALL_UNKNOWN;
                    889: }
                    890: 
                    891: /**
                    892:  * If call tracking is enabled (there are symbols), collect
                    893:  * information about subroutine and other calls, and their costs.
                    894:  * 
                    895:  * Like with profile data, caller info checks need to be for previous
                    896:  * instruction, that's why "pc" argument for this function actually
                    897:  * needs to be previous PC.
                    898:  */
                    899: static void collect_calls(Uint32 pc, counters_t *counters)
                    900: {
                    901:        calltype_t flag;
                    902:        int idx, family;
                    903:        Uint32 prev_pc, caller_pc;
                    904: 
                    905:        family = cpu_profile.prev_family;
                    906:        cpu_profile.prev_family = OpcodeFamily;
                    907: 
                    908:        prev_pc = cpu_callinfo.prev_pc;
                    909:        cpu_callinfo.prev_pc = pc;
                    910:        caller_pc = PC_UNDEFINED;
                    911: 
                    912:        /* address is return address for last subroutine call? */
                    913:        if (unlikely(pc == cpu_callinfo.return_pc) && likely(cpu_callinfo.depth)) {
                    914: 
                    915:                flag = cpu_opcode_type(family, prev_pc, pc);
1.1.1.2   root      916:                /* previous address can be exception return (e.g. RTE) instead of RTS,
                    917:                 * if exception occurred right after returning from subroutine call.
1.1       root      918:                 */
                    919:                if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) {
                    920:                        caller_pc = Profile_CallEnd(&cpu_callinfo, counters);
                    921:                } else {
                    922: #if DEBUG
                    923:                        /* although at return address, it didn't return yet,
                    924:                         * e.g. because there was a jsr or jump to return address
                    925:                         */
                    926:                        Uint32 nextpc;
1.1.1.2   root      927:                        fprintf(stderr, "WARNING: subroutine call returned 0x%x -> 0x%x, not through RTS!\n", prev_pc, pc);
1.1       root      928:                        Disasm(stderr, prev_pc, &nextpc, 1);
                    929: #endif
                    930:                }
1.1.1.2   root      931:                /* next address might be another symbol, so need to fall through */
1.1       root      932:        }
                    933: 
                    934:        /* address is one which we're tracking? */
1.1.1.5 ! root      935:        idx = Symbols_GetCpuCodeIndex(pc);
1.1       root      936:        if (unlikely(idx >= 0)) {
                    937: 
                    938:                flag = cpu_opcode_type(family, prev_pc, pc);
1.1.1.2   root      939:                if (flag == CALL_SUBROUTINE || flag == CALL_EXCEPTION) {
1.1       root      940:                        /* special HACK for for EmuTOS AES switcher which
                    941:                         * changes stack content to remove itself from call
                    942:                         * stack and uses RTS for subroutine *calls*, not
                    943:                         * for returning from them.
                    944:                         *
                    945:                         * It wouldn't be reliable to detect calls from it,
                    946:                         * so I'm making call *to* it show up as branch, to
                    947:                         * keep callstack depth correct.
                    948:                         */
                    949:                        if (unlikely(pc == etos_switcher)) {
                    950:                                flag = CALL_BRANCH;
                    951:                        } else if (unlikely(prev_pc == PC_UNDEFINED)) {
                    952:                                /* if first profiled instruction
                    953:                                 * is subroutine call, it doesn't have
                    954:                                 * valid prev_pc value stored
                    955:                                 */
                    956:                                cpu_callinfo.return_pc = PC_UNDEFINED;
1.1.1.3   root      957:                                fprintf(stderr, "WARNING: previous PC for tracked address 0x%d is undefined!\n", pc);
1.1       root      958: #if DEBUG
                    959:                                skip_assert = true;
                    960:                                DebugUI(REASON_CPU_EXCEPTION);
                    961: #endif
                    962:                        } else {
                    963:                                /* slow! */
                    964:                                cpu_callinfo.return_pc = Disasm_GetNextPC(prev_pc);
                    965:                        }
                    966:                } else if (caller_pc != PC_UNDEFINED) {
1.1.1.2   root      967:                        /* returned from function to first instruction of another symbol:
1.1       root      968:                         *      0xf384  jsr some_function
                    969:                         *      other_symbol:
                    970:                         *      0f3x8a  some_instruction
                    971:                         * -> change return instruction address to
                    972:                         *    address of what did the returned call.
                    973:                         */
                    974:                        prev_pc = caller_pc;
                    975:                        assert(is_prev_instr(prev_pc, pc));
                    976:                        flag = CALL_NEXT;
                    977:                }
                    978:                Profile_CallStart(idx, &cpu_callinfo, prev_pc, flag, pc, counters);
                    979:        }
                    980: }
                    981: 
                    982: /**
1.1.1.2   root      983:  * log last loop info, if there's suitable data for one
                    984:  */
                    985: static void log_last_loop(void)
                    986: {
                    987:        unsigned len = cpu_profile.loop_end - cpu_profile.loop_start;
                    988:        if (cpu_profile.loop_count > 1 && (len < profile_loop.cpu_limit || !profile_loop.cpu_limit)) {
                    989:                fprintf(profile_loop.fp, "CPU %d 0x%06x %d %d\n", nVBLs,
                    990:                        cpu_profile.loop_start, len, cpu_profile.loop_count);
                    991:        }
                    992: }
                    993: 
1.1.1.3   root      994: # if DEBUG || ENABLE_WINUAE_CPU
                    995: /**
                    996:  * Warning for values going out of expected range
                    997:  */
                    998: static Uint32 warn_too_large(const char *name, const int value, const int limit, const Uint32 prev_pc, const Uint32 pc)
                    999: {
                   1000:        Uint32 nextpc;
                   1001:        fprintf(stderr, "WARNING: unexpected (%d > %d) %s at 0x%x:\n", value, limit - 1, name, pc);
                   1002:        Disasm(stderr, prev_pc, &nextpc, 1);
                   1003:        Disasm(stderr, pc, &nextpc, 1);
                   1004: #if DEBUG
                   1005:        skip_assert = true;
                   1006:        DebugUI(REASON_CPU_EXCEPTION);
                   1007: #endif
                   1008:        return limit - 1;
                   1009: }
                   1010: #endif
                   1011: 
1.1.1.2   root     1012: /**
1.1       root     1013:  * Update CPU cycle and count statistics for PC address.
                   1014:  *
                   1015:  * This gets called after instruction has executed and PC
                   1016:  * has advanced to next instruction.
                   1017:  */
                   1018: void Profile_CpuUpdate(void)
                   1019: {
                   1020:        counters_t *counters = &(cpu_profile.all);
1.1.1.3   root     1021:        Uint32 pc, prev_pc, idx, cycles;
1.1       root     1022:        cpu_profile_item_t *prev;
1.1.1.3   root     1023: #if ENABLE_WINUAE_CPU
                   1024:        Uint32 i_hits, d_hits, i_misses, d_misses;
                   1025: #else
                   1026:        const Uint32 i_misses = 0, d_hits = 0;
                   1027: #endif
1.1       root     1028: 
                   1029:        prev_pc = cpu_profile.prev_pc;
1.1.1.3   root     1030:        /* PC may have extra bits when using 24 bit addressing, they need to be masked away as
1.1       root     1031:         * emulation itself does that too when PC value is used
                   1032:         */
1.1.1.3   root     1033:        cpu_profile.prev_pc = pc = M68000_GetPC();
                   1034:        if (ConfigureParams.System.bAddressSpace24) {
                   1035:                cpu_profile.prev_pc &= 0xffffff;
                   1036:        }
1.1.1.2   root     1037:        if (unlikely(profile_loop.fp)) {
                   1038:                if (pc < prev_pc) {
                   1039:                        if (pc == cpu_profile.loop_start && prev_pc == cpu_profile.loop_end) {
                   1040:                                cpu_profile.loop_count++;
                   1041:                        } else {
                   1042:                                cpu_profile.loop_start = pc;
                   1043:                                cpu_profile.loop_end = prev_pc;
                   1044:                                cpu_profile.loop_count = 1;
                   1045:                        }
                   1046:                } else {
                   1047:                        if (pc > cpu_profile.loop_end) {
                   1048:                                log_last_loop();
1.1.1.3   root     1049:                                cpu_profile.loop_end = 0xffffffff;
1.1.1.2   root     1050:                                cpu_profile.loop_count = 0;
                   1051:                        }
                   1052:                }
                   1053:        }
                   1054: 
1.1       root     1055:        idx = address2index(prev_pc);
                   1056:        assert(idx <= cpu_profile.size);
                   1057:        prev = cpu_profile.data + idx;
                   1058: 
                   1059:        if (likely(prev->count < MAX_CPU_PROFILE_VALUE)) {
                   1060:                prev->count++;
                   1061:        }
                   1062: 
1.1.1.3   root     1063:        cycles = CyclesGlobalClockCounter - cpu_profile.prev_cycles;
                   1064:        cpu_profile.prev_cycles = CyclesGlobalClockCounter;
1.1       root     1065: 
                   1066:        if (likely(prev->cycles < MAX_CPU_PROFILE_VALUE - cycles)) {
                   1067:                prev->cycles += cycles;
                   1068:        } else {
                   1069:                prev->cycles = MAX_CPU_PROFILE_VALUE;
                   1070:        }
                   1071: 
                   1072: #if ENABLE_WINUAE_CPU
1.1.1.3   root     1073:        /* only WinUAE CPU core provides cache information */
                   1074:        i_hits = CpuInstruction.I_Cache_hit;
                   1075:        d_hits = CpuInstruction.D_Cache_hit;
                   1076:        i_misses = CpuInstruction.I_Cache_miss;
                   1077:        d_misses = CpuInstruction.D_Cache_miss;
                   1078: 
                   1079:        /* reset cache stats after reading them (for the next instruction) */
                   1080:        CpuInstruction.I_Cache_hit = 0;
                   1081:        CpuInstruction.D_Cache_hit = 0;
                   1082:        CpuInstruction.I_Cache_miss = 0;
                   1083:        CpuInstruction.D_Cache_miss = 0;
                   1084: 
                   1085:        /* tracked for every address */
1.1.1.5 ! root     1086: # if DEBUG_CACHE
        !          1087:        if (likely(prev->i_hits < MAX_CPU_PROFILE_VALUE - i_hits)) {
        !          1088:                prev->i_hits += i_hits;
        !          1089:        } else {
        !          1090:                prev->i_hits = MAX_CPU_PROFILE_VALUE;
        !          1091:        }
        !          1092:        if (likely(prev->d_misses < MAX_CPU_PROFILE_VALUE - d_misses)) {
        !          1093:                prev->d_misses += d_misses;
        !          1094:        } else {
        !          1095:                prev->d_misses = MAX_CPU_PROFILE_VALUE;
        !          1096:        }
        !          1097: # endif
1.1.1.3   root     1098:        if (likely(prev->i_misses < MAX_CPU_PROFILE_VALUE - i_misses)) {
                   1099:                prev->i_misses += i_misses;
1.1       root     1100:        } else {
1.1.1.3   root     1101:                prev->i_misses = MAX_CPU_PROFILE_VALUE;
1.1       root     1102:        }
1.1.1.3   root     1103:        if (likely(prev->d_hits < MAX_CPU_PROFILE_VALUE - d_hits)) {
                   1104:                prev->d_hits += d_hits;
                   1105:        } else {
                   1106:                prev->d_hits = MAX_CPU_PROFILE_VALUE;
                   1107:        }
                   1108: 
                   1109:        /* tracking for histogram, check for array overflows */
1.1.1.5 ! root     1110:        if (!(i_hits || i_misses)) {
        !          1111:                cpu_profile.i_prefetched++;
        !          1112:        }
1.1.1.3   root     1113:        if (unlikely(i_hits >= MAX_I_HITS)) {
                   1114:                i_hits = warn_too_large("number of CPU instruction cache hits", i_hits, MAX_I_HITS, prev_pc, pc);
                   1115:        }
                   1116:        cpu_profile.i_hit_counts[i_hits]++;
                   1117: 
                   1118:        if (unlikely(i_misses >= MAX_I_MISSES)) {
                   1119:                i_misses = warn_too_large("number of CPU instruction cache misses", i_misses, MAX_I_MISSES, prev_pc, pc);
                   1120:        }
                   1121:        cpu_profile.i_miss_counts[i_misses]++;
                   1122: 
                   1123:        if (unlikely(d_hits >= MAX_D_HITS)) {
                   1124:                d_hits = warn_too_large("number of CPU data cache hits", d_hits, MAX_D_HITS, prev_pc, pc);
                   1125:        }
                   1126:        cpu_profile.d_hit_counts[d_hits]++;
                   1127: 
                   1128:        if (unlikely(d_misses >= MAX_D_MISSES)) {
                   1129:                d_misses = warn_too_large("number of CPU data cache misses", d_misses, MAX_D_MISSES, prev_pc, pc);
                   1130:        }
                   1131:        cpu_profile.d_miss_counts[d_misses]++;
1.1.1.5 ! root     1132: #endif   /* ENABLE_WINUAE_CPU */
1.1.1.3   root     1133: 
1.1       root     1134:        if (cpu_callinfo.sites) {
                   1135:                collect_calls(prev_pc, counters);
                   1136:        }
1.1.1.5 ! root     1137:        /* total counters are increased after caller info is processed,
1.1       root     1138:         * otherwise cost for the instruction calling the callee
                   1139:         * doesn't get accounted to caller (but callee).
                   1140:         */
                   1141:        counters->count++;
1.1.1.3   root     1142:        counters->cycles += cycles;
                   1143:        counters->i_misses += i_misses;
                   1144:        counters->d_hits += d_hits;
1.1       root     1145: 
                   1146: #if DEBUG
                   1147:        if (unlikely(OpcodeFamily == 0)) {
                   1148:                Uint32 nextpc;
                   1149:                fputs("WARNING: instruction opcode family is zero (=i_ILLG) for instruction:\n", stderr);
                   1150:                Disasm(stderr, prev_pc, &nextpc, 1);
                   1151:        }
                   1152:        /* catch too large (and negative) cycles for other than STOP instruction */
                   1153:        if (unlikely(cycles > 512 && OpcodeFamily != i_STOP)) {
1.1.1.3   root     1154:                warn_too_large("cycles", cycles, 512, prev_pc, pc);
1.1       root     1155:        }
1.1.1.3   root     1156: # if !ENABLE_WINUAE_CPU
                   1157:        {
                   1158:                static Uint32 prev_cycles = 0, prev_pc2 = 0;
                   1159:                if (unlikely(cycles == 0 && prev_cycles == 0)) {
                   1160:                        Uint32 nextpc;
                   1161:                        fputs("WARNING: Zero cycles for successive opcodes:\n", stderr);
                   1162:                        Disasm(stderr, prev_pc2, &nextpc, 1);
                   1163:                        Disasm(stderr, prev_pc, &nextpc, 1);
                   1164:                }
                   1165:                prev_cycles = cycles;
                   1166:                prev_pc2 = prev_pc;
1.1       root     1167:        }
1.1.1.3   root     1168: # endif
1.1       root     1169: #endif
                   1170: }
                   1171: 
                   1172: 
                   1173: /**
                   1174:  * Helper for accounting CPU profile area item.
                   1175:  */
                   1176: static void update_area_item(profile_area_t *area, Uint32 addr, cpu_profile_item_t *item)
                   1177: {
                   1178:        Uint32 cycles = item->cycles;
                   1179:        Uint32 count = item->count;
                   1180: 
                   1181:        if (!count) {
                   1182:                return;
                   1183:        }
                   1184:        area->counters.count += count;
                   1185:        area->counters.cycles += cycles;
1.1.1.3   root     1186:        area->counters.i_misses += item->i_misses;
                   1187:        area->counters.d_hits += item->d_hits;
1.1       root     1188: 
                   1189:        if (cycles == MAX_CPU_PROFILE_VALUE) {
                   1190:                area->overflow = true;
                   1191:        }
                   1192:        if (addr < area->lowest) {
                   1193:                area->lowest = addr;
                   1194:        }
                   1195:        area->highest = addr;
                   1196: 
                   1197:        area->active++;
                   1198: }
                   1199: 
                   1200: /**
                   1201:  * Helper for collecting CPU profile area statistics.
                   1202:  */
                   1203: static Uint32 update_area(profile_area_t *area, Uint32 start, Uint32 end)
                   1204: {
                   1205:        cpu_profile_item_t *item;
                   1206:        Uint32 addr;
                   1207: 
                   1208:        memset(area, 0, sizeof(profile_area_t));
                   1209:        area->lowest = cpu_profile.size;
                   1210: 
                   1211:        item = &(cpu_profile.data[start]);
                   1212:        for (addr = start; addr < end; addr++, item++) {
                   1213:                update_area_item(area, addr, item);
                   1214:        }
                   1215:        return addr;
                   1216: }
                   1217: 
                   1218: /**
                   1219:  * Helper for initializing CPU profile area sorting indexes.
                   1220:  */
                   1221: static Uint32* index_area(profile_area_t *area, Uint32 *sort_arr)
                   1222: {
                   1223:        cpu_profile_item_t *item;
                   1224:        Uint32 addr;
                   1225: 
                   1226:        item = &(cpu_profile.data[area->lowest]);
                   1227:        for (addr = area->lowest; addr <= area->highest; addr++, item++) {
                   1228:                if (item->count) {
                   1229:                        *sort_arr++ = addr;
                   1230:                }
                   1231:        }
                   1232:        return sort_arr;
                   1233: }
                   1234: 
                   1235: /**
                   1236:  * Stop and process the CPU profiling data; collect stats and
                   1237:  * prepare for more optimal sorting.
                   1238:  */
                   1239: void Profile_CpuStop(void)
                   1240: {
                   1241:        Uint32 *sort_arr, next;
1.1.1.3   root     1242:        unsigned int size, stsize;
1.1       root     1243:        int active;
                   1244: 
                   1245:        if (cpu_profile.processed || !cpu_profile.enabled) {
                   1246:                return;
                   1247:        }
1.1.1.2   root     1248: 
                   1249:        log_last_loop();
                   1250:        if (profile_loop.fp) {
                   1251:                fflush(profile_loop.fp);
                   1252:        }
                   1253: 
1.1       root     1254:        /* user didn't change RAM or TOS size in the meanwhile? */
1.1.1.3   root     1255:        size = stsize = (STRamEnd + CART_SIZE + TosSize) / 2;
1.1.1.5 ! root     1256:        if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) {
        !          1257:                size += ConfigureParams.Memory.TTRamSize_KB * 1024/2;
1.1.1.3   root     1258:        }
                   1259:        assert(cpu_profile.size == size);
1.1       root     1260: 
                   1261:        Profile_FinalizeCalls(&(cpu_callinfo), &(cpu_profile.all), Symbols_GetByCpuAddress);
                   1262: 
                   1263:        /* find lowest and highest addresses executed etc */
                   1264:        next = update_area(&cpu_profile.ram, 0, STRamEnd/2);
1.1.1.3   root     1265:        if (TosAddress < CART_START) {
                   1266:                next = update_area(&cpu_profile.tos, next, (STRamEnd + TosSize)/2);
                   1267:                next = update_area(&cpu_profile.rom, next, stsize);
                   1268:        } else {
                   1269:                next = update_area(&cpu_profile.rom, next, (STRamEnd + CART_SIZE)/2);
                   1270:                next = update_area(&cpu_profile.tos, next, stsize);
                   1271:        }
                   1272:        next = update_area(&cpu_profile.ttram, next, size);
                   1273:        assert(next == size);
1.1       root     1274: 
                   1275: #if DEBUG
                   1276:        if (skip_assert) {
                   1277:                skip_assert = false;
                   1278:        } else
                   1279: #endif
                   1280:        {
1.1.1.3   root     1281: #if DEBUG
                   1282:                if (cpu_profile.all.count != cpu_profile.ttram.counters.count + cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count) {
                   1283:                        fprintf(stderr, "ERROR, instruction count mismatch:\n\t%"PRIu64" != %"PRIu64" + %"PRIu64" + %"PRIu64" + %"PRIu64"?\n",
                   1284:                                cpu_profile.all.count, cpu_profile.ttram.counters.count, cpu_profile.ram.counters.count,
                   1285:                                cpu_profile.tos.counters.count, cpu_profile.rom.counters.count);
                   1286:                        fprintf(stderr, "If there was debugger invocation from profiling before this, try with profiler DEBUG define disabled!!!\n");
                   1287:                }
                   1288: #endif
                   1289:                assert(cpu_profile.all.count == cpu_profile.ttram.counters.count + cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count);
                   1290:                assert(cpu_profile.all.cycles == cpu_profile.ttram.counters.cycles + cpu_profile.ram.counters.cycles + cpu_profile.tos.counters.cycles + cpu_profile.rom.counters.cycles);
                   1291:                assert(cpu_profile.all.i_misses == cpu_profile.ttram.counters.i_misses + cpu_profile.ram.counters.i_misses + cpu_profile.tos.counters.i_misses + cpu_profile.rom.counters.i_misses);
                   1292:                assert(cpu_profile.all.d_hits == cpu_profile.ttram.counters.d_hits + cpu_profile.ram.counters.d_hits + cpu_profile.tos.counters.d_hits + cpu_profile.rom.counters.d_hits);
1.1       root     1293:        }
                   1294: 
                   1295:        /* allocate address array for sorting */
1.1.1.3   root     1296:        active = cpu_profile.ttram.active + cpu_profile.ram.active + cpu_profile.rom.active + cpu_profile.tos.active;
1.1       root     1297:        sort_arr = calloc(active, sizeof(*sort_arr));
                   1298: 
                   1299:        if (!sort_arr) {
                   1300:                perror("ERROR: allocating CPU profile address data");
                   1301:                free(cpu_profile.data);
                   1302:                cpu_profile.data = NULL;
                   1303:                return;
                   1304:        }
                   1305:        printf("Allocated CPU profile address buffer (%d KB).\n",
                   1306:               (int)sizeof(*sort_arr)*(active+512)/1024);
                   1307:        cpu_profile.sort_arr = sort_arr;
                   1308:        cpu_profile.active = active;
                   1309: 
                   1310:        /* and fill addresses for used instructions... */
                   1311:        sort_arr = index_area(&cpu_profile.ram, sort_arr);
                   1312:        sort_arr = index_area(&cpu_profile.tos, sort_arr);
                   1313:        sort_arr = index_area(&cpu_profile.rom, sort_arr);
1.1.1.3   root     1314:        sort_arr = index_area(&cpu_profile.ttram, sort_arr);
1.1       root     1315:        assert(sort_arr == cpu_profile.sort_arr + cpu_profile.active);
                   1316:        //printf("%d/%d/%d\n", area->active, sort_arr-cpu_profile.sort_arr, active);
                   1317: 
                   1318:        Profile_CpuShowStats();
                   1319:        cpu_profile.processed = true;
                   1320: }
                   1321: 
                   1322: /**
                   1323:  * Get pointers to CPU profile enabling and disasm address variables
                   1324:  * for updating them (in parser).
                   1325:  */
                   1326: void Profile_CpuGetPointers(bool **enabled, Uint32 **disasm_addr)
                   1327: {
                   1328:        *disasm_addr = &cpu_profile.disasm_addr;
                   1329:        *enabled = &cpu_profile.enabled;
                   1330: }
                   1331: 
                   1332: /**
                   1333:  * Get callinfo & symbol search pointers for stack walking.
                   1334:  */
1.1.1.5 ! root     1335: void Profile_CpuGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32, symtype_t))
1.1       root     1336: {
                   1337:        *callinfo = &(cpu_callinfo);
                   1338:        *get_symbol = Symbols_GetByCpuAddress;
                   1339: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.