Annotation of hatari/src/debug/profilecpu.c, revision 1.1.1.6

1.1       root        1: /*
                      2:  * Hatari - profilecpu.c
                      3:  * 
1.1.1.3   root        4:  * Copyright (C) 2010-2015 by Eero Tamminen
1.1       root        5:  *
                      6:  * This file is distributed under the GNU General Public License, version 2
                      7:  * or at your option any later version. Read the file gpl.txt for details.
                      8:  *
                      9:  * profilecpu.c - functions for profiling CPU and showing the results.
                     10:  */
                     11: const char Profilecpu_fileid[] = "Hatari profilecpu.c : " __DATE__ " " __TIME__;
                     12: 
                     13: #include <stdio.h>
                     14: #include <inttypes.h>
                     15: #include <assert.h>
                     16: #include "main.h"
                     17: #include "configuration.h"
                     18: #include "clocks_timings.h"
                     19: #include "debugInfo.h"
                     20: #include "dsp.h"
                     21: #include "m68000.h"
                     22: #include "68kDisass.h"
1.1.1.5   root       23: #include "symbols.h"
1.1       root       24: #include "profile.h"
                     25: #include "profile_priv.h"
1.1.1.5   root       26: #include "debug_priv.h"
1.1       root       27: #include "stMemory.h"
                     28: #include "tos.h"
1.1.1.2   root       29: #include "screen.h"
                     30: #include "video.h"
                     31: 
                     32: 
                     33: /* cartridge area */
                     34: #define CART_START     0xFA0000
                     35: #define CART_END       0xFC0000
                     36: #define CART_SIZE      (CART_END - CART_START)
                     37: 
1.1.1.3   root       38: #define TTRAM_START    0x01000000
1.1       root       39: 
                     40: /* if non-zero, output (more) warnings on suspicious:
                     41:  * - cycle/instruction counts
                     42:  * - PC switches
1.1.1.3   root       43:  * And drop to debugger on invalid current & previous PC addresses.
                     44:  *
                     45:  * NOTE: DebugUI() calls that DEBUG define enables, can cause
                     46:  * instruction count mismatch assertions because debugger invocation
                     47:  * resets the counters AND happens in middle of data collection.
                     48:  * It's best to quit after debugging the issue ('q' command).
1.1       root       49:  */
                     50: #define DEBUG 0
                     51: #if DEBUG
                     52: #include "debugui.h"
                     53: static bool skip_assert;
                     54: #endif
                     55: 
1.1.1.5   root       56: /* whether to track & show all cache stats for all instructions */
                     57: #define DEBUG_CACHE 0
                     58: 
                     59: 
1.1       root       60: static callinfo_t cpu_callinfo;
                     61: 
                     62: #define MAX_CPU_PROFILE_VALUE 0xFFFFFFFF
                     63: 
                     64: typedef struct {
1.1.1.5   root       65:        Uint32 count;   /* how many times this address instruction is executed */
1.1       root       66:        Uint32 cycles;  /* how many CPU cycles was taken at this address */
1.1.1.5   root       67: #if DEBUG_CACHE                  /* track also less relevant cache events */
                     68:        Uint32 i_hits;    /* how many CPU i-cache hits happened at this address */
                     69:        Uint32 d_misses;  /* how many CPU d-cache misses happened at this address */
                     70: #endif
                     71:        Uint32 i_misses;  /* how many CPU i-cache misses happened at this address */
                     72:        Uint32 d_hits;    /* how many CPU d-cache hits happened at this address */
1.1       root       73: } cpu_profile_item_t;
                     74: 
1.1.1.5   root       75: 
                     76: /* max count of hits/misses single instruction can trigger at once */
1.1.1.3   root       77: #define MAX_I_HITS   8
                     78: #define MAX_I_MISSES 8
                     79: #define MAX_D_HITS   32
                     80: #define MAX_D_MISSES 20
1.1       root       81: 
                     82: static struct {
                     83:        counters_t all;       /* total counts for all areas */
                     84:        cpu_profile_item_t *data; /* profile data items */
                     85:        Uint32 size;          /* number of allocated profile data items */
1.1.1.3   root       86:        profile_area_t ttram; /* TT-RAM stats */
1.1       root       87:        profile_area_t ram;   /* normal RAM stats */
                     88:        profile_area_t rom;   /* cartridge ROM stats */
                     89:        profile_area_t tos;   /* ROM TOS stats */
                     90:        int active;           /* number of active data items in all areas */
                     91:        Uint32 *sort_arr;     /* data indexes used for sorting */
1.1.1.2   root       92:        int prev_family;      /* previous instruction opcode family */
1.1.1.3   root       93:        Uint64 prev_cycles;   /* previous instruction cycles counter */
1.1       root       94:        Uint32 prev_pc;       /* previous instruction address */
1.1.1.2   root       95:        Uint32 loop_start;    /* address of last loop start */
                     96:        Uint32 loop_end;      /* address of last loop end */
                     97:        Uint32 loop_count;    /* how many times it was looped */
1.1       root       98:        Uint32 disasm_addr;   /* 'addresses' command start address */
1.1.1.3   root       99: #if ENABLE_WINUAE_CPU
1.1.1.5   root      100:        Uint32 i_prefetched;  /* instructions that don't incur prefetch hit/miss */
1.1.1.3   root      101:        Uint32 i_hit_counts[MAX_I_HITS];    /* I-cache hit counts */
                    102:        Uint32 d_hit_counts[MAX_D_HITS];    /* D-cache hit counts */
                    103:        Uint32 i_miss_counts[MAX_I_MISSES]; /* I-cache miss counts */
                    104:        Uint32 d_miss_counts[MAX_D_MISSES]; /* D-cache miss counts */
                    105: #endif
1.1       root      106:        bool processed;       /* true when data is already processed */
                    107:        bool enabled;         /* true when profiling enabled */
                    108: } cpu_profile;
                    109: 
                    110: /* special hack for EmuTOS */
                    111: static Uint32 etos_switcher;
                    112: 
                    113: 
                    114: /* ------------------ CPU profile address mapping ----------------- */
                    115: 
                    116: /**
                    117:  * convert Atari memory address to sorting array profile data index.
                    118:  */
                    119: static inline Uint32 address2index(Uint32 pc)
                    120: {
                    121:        if (unlikely(pc & 1)) {
                    122:                fprintf(stderr, "WARNING: odd CPU profile instruction address 0x%x!\n", pc);
                    123: #if DEBUG
                    124:                skip_assert = true;
                    125:                DebugUI(REASON_CPU_EXCEPTION);
                    126: #endif
                    127:        }
1.1.1.2   root      128:        if (pc < STRamEnd) {
                    129:                /* most likely case, use RAM address as-is */
                    130: 
                    131:        } else if (pc >= TosAddress && pc < TosAddress + TosSize) {
1.1       root      132:                /* TOS, put it after RAM data */
                    133:                pc = pc - TosAddress + STRamEnd;
1.1.1.2   root      134:                if (TosAddress >= CART_END) {
                    135:                        /* and after cartridge data as it's higher */
                    136:                        pc += CART_SIZE;
                    137:                }
                    138:        } else if (pc >= CART_START && pc < CART_END) {
                    139:                /* ROM, put it after RAM data */
                    140:                pc = pc - CART_START + STRamEnd;
                    141:                if (TosAddress < CART_START) {
                    142:                        /* and after TOS as it's higher */
                    143:                        pc += TosSize;
                    144:                }
1.1.1.3   root      145: #if ENABLE_WINUAE_CPU
1.1.1.5   root      146:        } else if (TTmemory && pc >= TTRAM_START && pc < TTRAM_START + 1024*(unsigned)ConfigureParams.Memory.TTRamSize_KB) {
1.1.1.3   root      147:                pc += STRamEnd + TosSize + CART_SIZE - TTRAM_START;
                    148: #endif
1.1       root      149:        } else {
1.1.1.2   root      150:                fprintf(stderr, "WARNING: 'invalid' CPU PC profile instruction address 0x%x!\n", pc);
                    151:                /* extra entry at end is reserved for invalid PC values */
1.1.1.3   root      152:                pc = STRamEnd + TosSize + CART_SIZE;
1.1       root      153: #if DEBUG
1.1.1.2   root      154:                skip_assert = true;
                    155:                DebugUI(REASON_CPU_EXCEPTION);
1.1       root      156: #endif
                    157:        }
                    158:        /* CPU instructions are at even addresses, save space by halving */
                    159:        return (pc >> 1);
                    160: }
                    161: 
                    162: /**
                    163:  * convert sorting array profile data index to Atari memory address.
                    164:  */
                    165: static Uint32 index2address(Uint32 idx)
                    166: {
                    167:        idx <<= 1;
                    168:        /* RAM */
                    169:        if (idx < STRamEnd) {
                    170:                return idx;
                    171:        }
                    172:        idx -= STRamEnd;
1.1.1.2   root      173:        /* TOS before cartridge area? */
                    174:        if (TosAddress < CART_START) {
                    175:                /* TOS */
                    176:                if (idx < TosSize) {
                    177:                        return idx + TosAddress;
                    178:                }
                    179:                idx -= TosSize;
                    180:                /* ROM */
1.1.1.3   root      181:                if (idx < CART_SIZE) {
                    182:                        return idx + CART_START;
                    183:                }
                    184:                idx -= CART_SIZE;
1.1.1.2   root      185:        } else {
                    186:                /* ROM */
                    187:                if (idx < CART_SIZE) {
                    188:                        return idx + CART_START;
                    189:                }
                    190:                idx -= CART_SIZE;
                    191:                /* TOS */
1.1.1.3   root      192:                if (idx < TosSize) {
                    193:                        return idx + TosAddress;
                    194:                }
                    195:                idx -= TosSize;
1.1       root      196:        }
1.1.1.3   root      197:        return idx + TTRAM_START;
1.1       root      198: }
                    199: 
                    200: /* ------------------ CPU profile results ----------------- */
                    201: 
                    202: /**
1.1.1.5   root      203:  * Write string containing CPU cache stats, cycles, count, count percentage
                    204:  * for given address to provided buffer.
                    205:  *
1.1       root      206:  * Return true if data was available and non-zero, false otherwise.
                    207:  */
1.1.1.5   root      208: bool Profile_CpuAddressDataStr(char *buffer, size_t maxlen, Uint32 addr)
1.1       root      209: {
1.1.1.5   root      210:        cpu_profile_item_t *item;
                    211:        float percentage;
1.1       root      212:        Uint32 idx;
1.1.1.5   root      213: 
                    214:        assert(buffer && maxlen);
1.1       root      215:        if (!cpu_profile.data) {
                    216:                return false;
                    217:        }
                    218:        idx = address2index(addr);
1.1.1.5   root      219:        item = &(cpu_profile.data[idx]);
                    220: 
1.1       root      221:        if (cpu_profile.all.count) {
1.1.1.5   root      222:                percentage = 100.0 * item->count / cpu_profile.all.count;
1.1       root      223:        } else {
1.1.1.5   root      224:                percentage = 0.0;
1.1       root      225:        }
1.1.1.5   root      226: #if DEBUG_CACHE
                    227:        snprintf(buffer, maxlen, "%5.2f%% (%u, %u, %u, %u, %u, %u)",
                    228:                 percentage, item->count, item->cycles,
                    229:                 item->i_hits, item->i_misses,
                    230:                 item->d_hits, item->d_misses);
                    231: #else
                    232:        snprintf(buffer, maxlen, "%5.2f%% (%u, %u, %u, %u)",
                    233:                 percentage, item->count, item->cycles,
                    234:                 item->i_misses, item->d_hits);
                    235: #endif
                    236:        return (item->count > 0);
1.1       root      237: }
                    238: 
                    239: /**
                    240:  * Helper to show statistics for specified CPU profile area.
                    241:  */
                    242: static void show_cpu_area_stats(profile_area_t *area)
                    243: {
                    244:        if (!area->active) {
                    245:                fprintf(stderr, "- no activity\n");
                    246:                return;
                    247:        }
                    248:        fprintf(stderr, "- active address range:\n  0x%06x-0x%06x\n",
                    249:                index2address(area->lowest),
                    250:                index2address(area->highest));
1.1.1.5   root      251:        fprintf(stderr, "- active instruction addresses:\n  %d (%.2f%% of all areas)\n",
1.1       root      252:                area->active,
                    253:                100.0 * area->active / cpu_profile.active);
1.1.1.5   root      254:        fprintf(stderr, "- executed instructions:\n  %"PRIu64" (%.2f%% of all areas)\n",
1.1       root      255:                area->counters.count,
                    256:                100.0 * area->counters.count / cpu_profile.all.count);
1.1.1.3   root      257:        /* CPU cache in use? */
                    258:        if (cpu_profile.all.i_misses) {
1.1.1.5   root      259:                fprintf(stderr, "- instruction cache misses:\n  %"PRIu64" (%.2f%% of all areas)\n",
1.1.1.3   root      260:                        area->counters.i_misses,
                    261:                        100.0 * area->counters.i_misses / cpu_profile.all.i_misses);
                    262:        }
                    263:        if (cpu_profile.all.d_hits) {
1.1.1.5   root      264:                fprintf(stderr, "- data cache hits:\n  %"PRIu64" (%.2f%% of all areas)\n",
1.1.1.3   root      265:                        area->counters.d_hits,
                    266:                        100.0 * area->counters.d_hits / cpu_profile.all.d_hits);
1.1       root      267:        }
1.1.1.5   root      268:        fprintf(stderr, "- used cycles:\n  %"PRIu64" (%.2f%% of all areas)\n  = %.5fs\n",
1.1       root      269:                area->counters.cycles,
                    270:                100.0 * area->counters.cycles / cpu_profile.all.cycles,
1.1.1.5   root      271:                (double)area->counters.cycles / MachineClocks.CPU_Freq_Emul);
1.1       root      272:        if (area->overflow) {
                    273:                fprintf(stderr, "  *** COUNTER OVERFLOW! ***\n");
                    274:        }
                    275: }
                    276: 
                    277: 
                    278: /**
                    279:  * show CPU area (RAM, ROM, TOS) specific statistics.
                    280:  */
                    281: void Profile_CpuShowStats(void)
                    282: {
                    283:        fprintf(stderr, "Normal RAM (0-0x%X):\n", STRamEnd);
                    284:        show_cpu_area_stats(&cpu_profile.ram);
                    285: 
                    286:        fprintf(stderr, "ROM TOS (0x%X-0x%X):\n", TosAddress, TosAddress + TosSize);
                    287:        show_cpu_area_stats(&cpu_profile.tos);
                    288: 
1.1.1.2   root      289:        fprintf(stderr, "Cartridge ROM (0x%X-%X):\n", CART_START, CART_END);
1.1       root      290:        show_cpu_area_stats(&cpu_profile.rom);
                    291: 
1.1.1.5   root      292:        if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) {
                    293:                fprintf(stderr, "TT-RAM (0x%X-%X):\n", TTRAM_START, TTRAM_START + 1024*ConfigureParams.Memory.TTRamSize_KB);
1.1.1.3   root      294:                show_cpu_area_stats(&cpu_profile.ttram);
                    295:        }
                    296: 
1.1       root      297:        fprintf(stderr, "\n= %.5fs\n",
1.1.1.5   root      298:                (double)cpu_profile.all.cycles / MachineClocks.CPU_Freq_Emul);
1.1.1.3   root      299: }
1.1       root      300: 
                    301: #if ENABLE_WINUAE_CPU
1.1.1.3   root      302: /**
                    303:  * show percentage histogram of given array items
                    304:  */
                    305: static void show_histogram(const char *title, int count, Uint32 *items)
                    306: {
1.1.1.5   root      307:        const Uint64 maxval = cpu_profile.all.count;
1.1.1.3   root      308:        Uint32 value;
                    309:        int i;
                    310: 
1.1.1.5   root      311:        fprintf(stderr, "\n%s, number of occurrences:\n", title);
1.1.1.3   root      312:        for (i = 0; i < count; i++) {
                    313:                value = items[i];
                    314:                if (value) {
                    315:                        int w, width = 50 * value / maxval+1;
                    316:                        fprintf(stderr, " %2d: ", i);
                    317:                        for (w = 0; w < width; w++) {
                    318:                                fputc('#', stderr);
                    319:                        }
                    320:                        fprintf(stderr, " %.3f%%\n", 100.0 * value / maxval);
1.1       root      321:                }
                    322:        }
                    323: }
                    324: 
                    325: /**
1.1.1.3   root      326:  * show CPU cache usage histograms
                    327:  */
                    328: void Profile_CpuShowCaches(void)
                    329: {
                    330:        if (!(cpu_profile.all.i_misses || cpu_profile.all.d_hits)) {
1.1.1.5   root      331:                fprintf(stderr, "No instruction/data cache information.\n");
1.1.1.3   root      332:                return;
                    333:        }
1.1.1.5   root      334:        fprintf(stderr,
                    335:                "\nNote:\n"
                    336:                "- these statistics include all profiled instructions, but\n"
                    337:                "- instruction cache events happen only on prefetch/branch\n"
                    338:                "- data cache events can happen only for instructions that do memory reads\n"
                    339:                "\nAlready prefetched instructions: %.3f%% (no hits/misses)\n",
                    340:                100.0 * cpu_profile.i_prefetched / cpu_profile.all.count);
                    341: 
1.1.1.3   root      342:        show_histogram("Instruction cache hits per instruction",
1.1.1.4   root      343:                       ARRAY_SIZE(cpu_profile.i_hit_counts), cpu_profile.i_hit_counts);
1.1.1.3   root      344:        show_histogram("Instruction cache misses per instruction",
1.1.1.4   root      345:                       ARRAY_SIZE(cpu_profile.i_miss_counts), cpu_profile.i_miss_counts);
1.1.1.3   root      346:        show_histogram("Data cache hits per instruction",
1.1.1.4   root      347:                       ARRAY_SIZE(cpu_profile.d_hit_counts), cpu_profile.d_hit_counts);
1.1.1.3   root      348:        show_histogram("Data cache misses per instruction",
1.1.1.4   root      349:                       ARRAY_SIZE(cpu_profile.d_miss_counts), cpu_profile.d_miss_counts);
1.1.1.3   root      350: }
                    351: #else
                    352: void Profile_CpuShowCaches(void) {
                    353:        fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
                    354: }
                    355: #endif
                    356: 
                    357: /**
1.1       root      358:  * Show CPU instructions which execution was profiled, in the address order,
                    359:  * starting from the given address.  Return next disassembly address.
                    360:  */
1.1.1.5   root      361: Uint32 Profile_CpuShowAddresses(Uint32 lower, Uint32 upper, FILE *out, paging_t use_paging)
1.1       root      362: {
                    363:        int oldcols[DISASM_COLUMNS], newcols[DISASM_COLUMNS];
1.1.1.5   root      364:        int show, shown, addrs, active;
1.1       root      365:        const char *symbol;
                    366:        cpu_profile_item_t *data;
                    367:        Uint32 idx, end, size;
                    368:        uaecptr nextpc, addr;
                    369: 
                    370:        data = cpu_profile.data;
                    371:        if (!data) {
                    372:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
                    373:                return 0;
                    374:        }
                    375: 
                    376:        size = cpu_profile.size;
                    377:        active = cpu_profile.active;
                    378:        if (upper) {
                    379:                end = address2index(upper);
                    380:                if (end > size) {
                    381:                        end = size;
                    382:                }
                    383:        } else {
                    384:                end = size;
1.1.1.6 ! root      385:        }
        !           386:        show = INT_MAX;
        !           387:        if (use_paging == PAGING_ENABLED) {
1.1.1.5   root      388:                show = DebugUI_GetPageLines(ConfigureParams.Debugger.nDisasmLines, 0);
1.1.1.6 ! root      389:                if (!show) {
        !           390:                        show = INT_MAX;
1.1       root      391:                }
                    392:        }
                    393: 
                    394:        /* get/change columns */
                    395:        Disasm_GetColumns(oldcols);
                    396:        Disasm_DisableColumn(DISASM_COLUMN_HEXDUMP, oldcols, newcols);
                    397:        Disasm_SetColumns(newcols);
                    398: 
1.1.1.3   root      399:        fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <sum of i-cache misses>, <sum of d-cache hits>)\n", out);
1.1.1.5   root      400:        shown = 2; /* first and last printf */
1.1       root      401: 
1.1.1.5   root      402:        addrs = nextpc = 0;
1.1       root      403:        idx = address2index(lower);
1.1.1.6 ! root      404:        for (; shown < show && addrs < active && idx < end; idx++) {
1.1       root      405:                if (!data[idx].count) {
                    406:                        continue;
                    407:                }
                    408:                addr = index2address(idx);
                    409:                if (addr != nextpc && nextpc) {
                    410:                        fprintf(out, "[...]\n");
1.1.1.5   root      411:                        shown++;
1.1       root      412:                }
1.1.1.5   root      413:                symbol = Symbols_GetByCpuAddress(addr, SYMTYPE_TEXT);
1.1       root      414:                if (symbol) {
                    415:                        fprintf(out, "%s:\n", symbol);
1.1.1.5   root      416:                        shown++;
1.1       root      417:                }
                    418:                /* NOTE: column setup works only with 68kDisass disasm engine! */
                    419:                Disasm(out, addr, &nextpc, 1);
                    420:                shown++;
1.1.1.5   root      421:                addrs++;
1.1       root      422:        }
1.1.1.6 ! root      423:        if (idx < end) {
        !           424:                printf("Disassembled %d (of active %d) CPU addresses.\n", addrs, active);
        !           425:        } else {
        !           426:                printf("Disassembled last %d (of active %d) CPU addresses, wrapping...\n", addrs, active);
        !           427:                nextpc = 0;
        !           428:        }
1.1       root      429:        /* restore disassembly columns */
                    430:        Disasm_SetColumns(oldcols);
                    431:        return nextpc;
                    432: }
                    433: 
                    434: /**
                    435:  * remove all disassembly columns except instruction ones.
                    436:  * data needed to restore columns is stored to "oldcols"
                    437:  */
                    438: static void leave_instruction_column(int *oldcols)
                    439: {
                    440:        int i, newcols[DISASM_COLUMNS];
                    441: 
                    442:        Disasm_GetColumns(oldcols);
                    443:        for (i = 0; i < DISASM_COLUMNS; i++) {
                    444:                if (i == DISASM_COLUMN_OPCODE || i == DISASM_COLUMN_OPERAND) {
                    445:                        continue;
                    446:                }
                    447:                Disasm_DisableColumn(i, oldcols, newcols);
                    448:                oldcols = newcols;
                    449:        }
                    450:        Disasm_SetColumns(newcols);
                    451: }
                    452: 
                    453: #if ENABLE_WINUAE_CPU
                    454: /**
                    455:  * compare function for qsort() to sort CPU profile data by instruction cache misses.
                    456:  */
1.1.1.3   root      457: static int cmp_cpu_i_misses(const void *p1, const void *p2)
1.1       root      458: {
1.1.1.3   root      459:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].i_misses;
                    460:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].i_misses;
1.1       root      461:        if (count1 > count2) {
                    462:                return -1;
                    463:        }
                    464:        if (count1 < count2) {
                    465:                return 1;
                    466:        }
                    467:        return 0;
                    468: }
                    469: 
                    470: /**
                    471:  * Sort CPU profile data addresses by instruction cache misses and show the results.
                    472:  */
1.1.1.3   root      473: void Profile_CpuShowInstrMisses(int show)
                    474: {
                    475:        int active;
                    476:        int oldcols[DISASM_COLUMNS];
                    477:        Uint32 *sort_arr, *end, addr, nextpc;
                    478:        cpu_profile_item_t *data = cpu_profile.data;
                    479:        float percentage;
                    480:        Uint32 count;
                    481: 
                    482:        if (!cpu_profile.all.i_misses) {
                    483:                fprintf(stderr, "No CPU instruction cache miss information available.\n");
                    484:                return;
                    485:        }
                    486: 
                    487:        active = cpu_profile.active;
                    488:        sort_arr = cpu_profile.sort_arr;
                    489:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_i_misses);
                    490: 
                    491:        leave_instruction_column(oldcols);
                    492: 
                    493:        printf("addr:\t\ti-cache misses:\n");
                    494:        show = (show < active ? show : active);
                    495:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    496:                addr = index2address(*sort_arr);
                    497:                count = data[*sort_arr].i_misses;
                    498:                percentage = 100.0*count/cpu_profile.all.i_misses;
                    499:                printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
                    500:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    501:                Disasm(stdout, addr, &nextpc, 1);
                    502:        }
                    503:        printf("%d CPU addresses listed.\n", show);
                    504: 
                    505:        Disasm_SetColumns(oldcols);
                    506: }
                    507: 
                    508: /**
                    509:  * compare function for qsort() to sort CPU profile data by data cache hits.
                    510:  */
                    511: static int cmp_cpu_d_hits(const void *p1, const void *p2)
                    512: {
                    513:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].d_hits;
                    514:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].d_hits;
                    515:        if (count1 > count2) {
                    516:                return -1;
                    517:        }
                    518:        if (count1 < count2) {
                    519:                return 1;
                    520:        }
                    521:        return 0;
                    522: }
                    523: 
                    524: /**
                    525:  * Sort CPU profile data addresses by data cache hits and show the results.
                    526:  */
                    527: void Profile_CpuShowDataHits(int show)
1.1       root      528: {
                    529:        int active;
                    530:        int oldcols[DISASM_COLUMNS];
                    531:        Uint32 *sort_arr, *end, addr, nextpc;
                    532:        cpu_profile_item_t *data = cpu_profile.data;
                    533:        float percentage;
                    534:        Uint32 count;
                    535: 
1.1.1.3   root      536:        if (!cpu_profile.all.d_hits) {
                    537:                fprintf(stderr, "No CPU data cache hit information available.\n");
1.1       root      538:                return;
                    539:        }
                    540: 
                    541:        active = cpu_profile.active;
                    542:        sort_arr = cpu_profile.sort_arr;
1.1.1.3   root      543:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_d_hits);
1.1       root      544: 
                    545:        leave_instruction_column(oldcols);
                    546: 
1.1.1.3   root      547:        printf("addr:\t\td-cache hits:\n");
1.1       root      548:        show = (show < active ? show : active);
                    549:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    550:                addr = index2address(*sort_arr);
1.1.1.3   root      551:                count = data[*sort_arr].d_hits;
                    552:                percentage = 100.0*count/cpu_profile.all.d_hits;
1.1       root      553:                printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
                    554:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    555:                Disasm(stdout, addr, &nextpc, 1);
                    556:        }
                    557:        printf("%d CPU addresses listed.\n", show);
                    558: 
                    559:        Disasm_SetColumns(oldcols);
                    560: }
1.1.1.3   root      561: 
1.1       root      562: #else
1.1.1.3   root      563: void Profile_CpuShowInstrMisses(int show) {
                    564:        fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
                    565: }
                    566: void Profile_CpuShowDataHits(int show) {
                    567:        fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
1.1       root      568: }
                    569: #endif
                    570: 
                    571: 
                    572: /**
                    573:  * compare function for qsort() to sort CPU profile data by cycles counts.
                    574:  */
                    575: static int cmp_cpu_cycles(const void *p1, const void *p2)
                    576: {
                    577:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].cycles;
                    578:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].cycles;
                    579:        if (count1 > count2) {
                    580:                return -1;
                    581:        }
                    582:        if (count1 < count2) {
                    583:                return 1;
                    584:        }
                    585:        return 0;
                    586: }
                    587: 
                    588: /**
                    589:  * Sort CPU profile data addresses by cycle counts and show the results.
                    590:  */
                    591: void Profile_CpuShowCycles(int show)
                    592: {
                    593:        int active;
                    594:        int oldcols[DISASM_COLUMNS];
                    595:        Uint32 *sort_arr, *end, addr, nextpc;
                    596:        cpu_profile_item_t *data = cpu_profile.data;
                    597:        float percentage;
                    598:        Uint32 count;
                    599: 
                    600:        if (!data) {
                    601:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
                    602:                return;
                    603:        }
                    604: 
                    605:        active = cpu_profile.active;
                    606:        sort_arr = cpu_profile.sort_arr;
                    607:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_cycles);
                    608: 
                    609:        leave_instruction_column(oldcols);
                    610: 
                    611:        printf("addr:\t\tcycles:\n");
                    612:        show = (show < active ? show : active);
                    613:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    614:                addr = index2address(*sort_arr);
                    615:                count = data[*sort_arr].cycles;
                    616:                percentage = 100.0*count/cpu_profile.all.cycles;
                    617:                printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
                    618:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    619:                Disasm(stdout, addr, &nextpc, 1);
                    620:        }
                    621:        printf("%d CPU addresses listed.\n", show);
                    622: 
                    623:        Disasm_SetColumns(oldcols);
                    624: }
                    625: 
                    626: /**
                    627:  * compare function for qsort() to sort CPU profile data by descending
                    628:  * address access counts.
                    629:  */
                    630: static int cmp_cpu_count(const void *p1, const void *p2)
                    631: {
                    632:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].count;
                    633:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].count;
                    634:        if (count1 > count2) {
                    635:                return -1;
                    636:        }
                    637:        if (count1 < count2) {
                    638:                return 1;
                    639:        }
                    640:        return 0;
                    641: }
                    642: 
                    643: /**
                    644:  * Sort CPU profile data addresses by call counts and show the results.
                    645:  * If symbols are requested and symbols are loaded, show (only) addresses
                    646:  * matching a symbol.
                    647:  */
                    648: void Profile_CpuShowCounts(int show, bool only_symbols)
                    649: {
                    650:        cpu_profile_item_t *data = cpu_profile.data;
                    651:        int symbols, matched, active;
                    652:        int oldcols[DISASM_COLUMNS];
                    653:        Uint32 *sort_arr, *end, addr, nextpc;
                    654:        const char *name;
                    655:        float percentage;
                    656:        Uint32 count;
                    657: 
                    658:        if (!data) {
                    659:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
                    660:                return;
                    661:        }
                    662:        active = cpu_profile.active;
                    663:        show = (show < active ? show : active);
                    664: 
                    665:        sort_arr = cpu_profile.sort_arr;
                    666:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_count);
                    667: 
                    668:        if (!only_symbols) {
                    669:                leave_instruction_column(oldcols);
                    670:                printf("addr:\t\tcount:\n");
                    671:                for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    672:                        addr = index2address(*sort_arr);
                    673:                        count = data[*sort_arr].count;
                    674:                        percentage = 100.0*count/cpu_profile.all.count;
                    675:                        printf("0x%06x\t%5.2f%%\t%d%s\t",
                    676:                               addr, percentage, count,
                    677:                               count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    678:                        Disasm(stdout, addr, &nextpc, 1);
                    679:                }
                    680:                printf("%d CPU addresses listed.\n", show);
                    681:                Disasm_SetColumns(oldcols);
                    682:                return;
                    683:        }
                    684: 
1.1.1.5   root      685:        symbols = Symbols_CpuCodeCount();
1.1       root      686:        if (!symbols) {
                    687:                fprintf(stderr, "ERROR: no CPU symbols loaded!\n");
                    688:                return;
                    689:        }
                    690:        matched = 0;    
                    691: 
                    692:        leave_instruction_column(oldcols);
                    693: 
                    694:        printf("addr:\t\tcount:\t\tsymbol:\n");
                    695:        for (end = sort_arr + active; sort_arr < end; sort_arr++) {
                    696: 
                    697:                addr = index2address(*sort_arr);
1.1.1.5   root      698:                name = Symbols_GetByCpuAddress(addr, SYMTYPE_TEXT);
1.1       root      699:                if (!name) {
                    700:                        continue;
                    701:                }
                    702:                count = data[*sort_arr].count;
                    703:                percentage = 100.0*count/cpu_profile.all.count;
                    704:                printf("0x%06x\t%5.2f%%\t%d\t%s%s\t",
                    705:                       addr, percentage, count, name,
                    706:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    707:                Disasm(stdout, addr, &nextpc, 1);
                    708: 
                    709:                matched++;
                    710:                if (matched >= show || matched >= symbols) {
                    711:                        break;
                    712:                }
                    713:        }
                    714:        printf("%d CPU symbols listed.\n", matched);
                    715: 
                    716:        Disasm_SetColumns(oldcols);
                    717: }
                    718: 
                    719: 
                    720: static const char * addr2name(Uint32 addr, Uint64 *total)
                    721: {
                    722:        Uint32 idx = address2index(addr);
                    723:        *total = cpu_profile.data[idx].count;
1.1.1.5   root      724:        return Symbols_GetByCpuAddress(addr, SYMTYPE_TEXT);
1.1       root      725: }
                    726: 
                    727: /**
                    728:  * Output CPU callers info to given file.
                    729:  */
                    730: void Profile_CpuShowCallers(FILE *fp)
                    731: {
                    732:        Profile_ShowCallers(fp, cpu_callinfo.sites, cpu_callinfo.site, addr2name);
                    733: }
                    734: 
                    735: /**
                    736:  * Save CPU profile information to given file.
                    737:  */
                    738: void Profile_CpuSave(FILE *out)
                    739: {
1.1.1.3   root      740:        Uint32 text, end;
                    741:        fputs("Field names:\tExecuted instructions, Used cycles, Instruction cache misses, Data cache hits\n", out);
                    742:        /* (Python) regexp that matches address and all described fields from disassembly:
                    743:         * $<hex>  :  <ASM>  <percentage>% (<count>, <cycles>, <i-misses>, <d-hits>)
                    744:         * $e5af38 :   rts           0.00% (12, 0, 12, 0)
1.1       root      745:         */
                    746:        fputs("Field regexp:\t^\\$([0-9a-f]+) :.*% \\((.*)\\)$\n", out);
                    747:        /* some information for interpreting the addresses */
1.1.1.3   root      748:        fprintf(out, "ST_RAM:\t\t0x%06x-0x%06x\n", 0, STRamEnd);
                    749:        end = TosAddress + TosSize;
                    750:        fprintf(out, "ROM_TOS:\t0x%06x-0x%06x\n", TosAddress, end);
                    751:        fprintf(out, "CARTRIDGE:\t0x%06x-0x%06x\n", CART_START, CART_END);
1.1       root      752:        text = DebugInfo_GetTEXT();
1.1.1.3   root      753:        if (text && (text < TosAddress || text >= TTRAM_START)) {
1.1       root      754:                fprintf(out, "PROGRAM_TEXT:\t0x%06x-0x%06x\n", text, DebugInfo_GetTEXTEnd());
                    755:        }
1.1.1.5   root      756:        if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) {
                    757:                end = TTRAM_START + 1024*ConfigureParams.Memory.TTRamSize_KB;
1.1.1.3   root      758:                fprintf(out, "TT_RAM:\t\t0x%08x-0x%08x\n", TTRAM_START, end);
                    759:        } else if (end < CART_END) {
                    760:                end = CART_END;
                    761:        }
1.1.1.5   root      762:        Profile_CpuShowAddresses(0, end-2, out, PAGING_DISABLED);
1.1       root      763:        Profile_CpuShowCallers(out);
                    764: }
                    765: 
                    766: /* ------------------ CPU profile control ----------------- */
                    767: 
                    768: /**
                    769:  * Initialize CPU profiling when necessary.  Return true if profiling.
                    770:  */
                    771: bool Profile_CpuStart(void)
                    772: {
                    773:        int size;
                    774: 
                    775:        Profile_FreeCallinfo(&(cpu_callinfo));
                    776:        if (cpu_profile.sort_arr) {
                    777:                /* remove previous results */
                    778:                free(cpu_profile.sort_arr);
                    779:                free(cpu_profile.data);
                    780:                cpu_profile.sort_arr = NULL;
                    781:                cpu_profile.data = NULL;
                    782:                printf("Freed previous CPU profile buffers.\n");
                    783:        }
                    784:        if (!cpu_profile.enabled) {
                    785:                return false;
                    786:        }
                    787:        /* zero everything */
                    788:        memset(&cpu_profile, 0, sizeof(cpu_profile));
                    789: 
                    790:        /* Shouldn't change within same debug session */
1.1.1.3   root      791:        size = (STRamEnd + CART_SIZE + TosSize) / 2;
1.1.1.5   root      792:        if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) {
                    793:                size += ConfigureParams.Memory.TTRamSize_KB * 1024/2;
1.1.1.3   root      794:        }
1.1       root      795: 
                    796:        /* Add one entry for catching invalid PC values */
                    797:        cpu_profile.data = calloc(size + 1, sizeof(*cpu_profile.data));
                    798:        if (!cpu_profile.data) {
                    799:                perror("ERROR, new CPU profile buffer alloc failed");
                    800:                return false;
                    801:        }
                    802:        printf("Allocated CPU profile buffer (%d MB).\n",
                    803:               (int)sizeof(*cpu_profile.data)*size/(1024*1024));
                    804:        cpu_profile.size = size;
                    805: 
1.1.1.5   root      806:        Profile_AllocCallinfo(&(cpu_callinfo), Symbols_CpuCodeCount(), "CPU");
1.1       root      807: 
                    808:        /* special hack for EmuTOS */
                    809:        etos_switcher = PC_UNDEFINED;
                    810:        if (cpu_callinfo.sites && bIsEmuTOS &&
                    811:            (!Symbols_GetCpuAddress(SYMTYPE_TEXT, "_switchto", &etos_switcher) || etos_switcher < TosAddress)) {
                    812:                etos_switcher = PC_UNDEFINED;
                    813:        }
                    814: 
1.1.1.3   root      815:        /* reset cache stats (CPU emulation doesn't do that) */
                    816:        CpuInstruction.D_Cache_hit = 0;
                    817:        CpuInstruction.I_Cache_hit = 0;
                    818:        CpuInstruction.I_Cache_miss = 0;
                    819:        CpuInstruction.D_Cache_miss = 0;
1.1       root      820: 
1.1.1.3   root      821:        cpu_profile.prev_cycles = CyclesGlobalClockCounter;
                    822:        cpu_profile.prev_family = OpcodeFamily;
                    823:        cpu_profile.prev_pc = M68000_GetPC();
                    824:        if (ConfigureParams.System.bAddressSpace24) {
                    825:                cpu_profile.prev_pc &= 0xffffff;
                    826:        }
1.1.1.2   root      827:        cpu_profile.loop_start = PC_UNDEFINED;
                    828:        cpu_profile.loop_end = PC_UNDEFINED;
                    829:        cpu_profile.loop_count = 0;
                    830:        Profile_LoopReset();
                    831: 
1.1       root      832:        cpu_profile.disasm_addr = 0;
                    833:        cpu_profile.processed = false;
                    834:        cpu_profile.enabled = true;
                    835:        return cpu_profile.enabled;
                    836: }
                    837: 
                    838: /**
                    839:  * return true if pc could be next instruction for previous pc
                    840:  */
                    841: static bool is_prev_instr(Uint32 prev_pc, Uint32 pc)
                    842: {
                    843:        /* just moved to next instruction (1-2 words)? */
                    844:        if (prev_pc < pc && (pc - prev_pc) <= 10) {
                    845:                return true;
                    846:        }
                    847:        return false;
                    848: }
                    849: 
                    850: /**
                    851:  * return caller instruction type classification
                    852:  */
                    853: static calltype_t cpu_opcode_type(int family, Uint32 prev_pc, Uint32 pc)
                    854: {
                    855:        switch (family) {
                    856: 
                    857:        case i_JSR:
                    858:        case i_BSR:
                    859:                return CALL_SUBROUTINE;
                    860: 
                    861:        case i_RTS:
                    862:        case i_RTR:
                    863:        case i_RTD:
                    864:                return CALL_SUBRETURN;
                    865: 
                    866:        case i_JMP:     /* often used also for "inlined" function calls... */
                    867:        case i_Bcc:     /* both BRA & BCC */
                    868:        case i_FBcc:
                    869:        case i_DBcc:
                    870:        case i_FDBcc:
                    871:                return CALL_BRANCH;
                    872: 
                    873:        case i_TRAP:
                    874:        case i_TRAPV:
                    875:        case i_TRAPcc:
                    876:        case i_FTRAPcc:
                    877:        case i_STOP:
                    878:        case i_ILLG:
                    879:        case i_CHK:
                    880:        case i_CHK2:
                    881:        case i_BKPT:
                    882:                return CALL_EXCEPTION;
                    883: 
                    884:        case i_RTE:
                    885:                return CALL_EXCRETURN;
                    886:        }
                    887:        /* just moved to next instruction? */
                    888:        if (is_prev_instr(prev_pc, pc)) {
                    889:                return CALL_NEXT;
                    890:        }
                    891:        return CALL_UNKNOWN;
                    892: }
                    893: 
                    894: /**
                    895:  * If call tracking is enabled (there are symbols), collect
                    896:  * information about subroutine and other calls, and their costs.
                    897:  * 
                    898:  * Like with profile data, caller info checks need to be for previous
                    899:  * instruction, that's why "pc" argument for this function actually
                    900:  * needs to be previous PC.
                    901:  */
                    902: static void collect_calls(Uint32 pc, counters_t *counters)
                    903: {
                    904:        calltype_t flag;
                    905:        int idx, family;
                    906:        Uint32 prev_pc, caller_pc;
                    907: 
                    908:        family = cpu_profile.prev_family;
                    909:        cpu_profile.prev_family = OpcodeFamily;
                    910: 
                    911:        prev_pc = cpu_callinfo.prev_pc;
                    912:        cpu_callinfo.prev_pc = pc;
                    913:        caller_pc = PC_UNDEFINED;
                    914: 
                    915:        /* address is return address for last subroutine call? */
                    916:        if (unlikely(pc == cpu_callinfo.return_pc) && likely(cpu_callinfo.depth)) {
                    917: 
                    918:                flag = cpu_opcode_type(family, prev_pc, pc);
1.1.1.2   root      919:                /* previous address can be exception return (e.g. RTE) instead of RTS,
                    920:                 * if exception occurred right after returning from subroutine call.
1.1       root      921:                 */
                    922:                if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) {
                    923:                        caller_pc = Profile_CallEnd(&cpu_callinfo, counters);
                    924:                } else {
                    925: #if DEBUG
                    926:                        /* although at return address, it didn't return yet,
                    927:                         * e.g. because there was a jsr or jump to return address
                    928:                         */
                    929:                        Uint32 nextpc;
1.1.1.2   root      930:                        fprintf(stderr, "WARNING: subroutine call returned 0x%x -> 0x%x, not through RTS!\n", prev_pc, pc);
1.1       root      931:                        Disasm(stderr, prev_pc, &nextpc, 1);
                    932: #endif
                    933:                }
1.1.1.2   root      934:                /* next address might be another symbol, so need to fall through */
1.1       root      935:        }
                    936: 
                    937:        /* address is one which we're tracking? */
1.1.1.5   root      938:        idx = Symbols_GetCpuCodeIndex(pc);
1.1       root      939:        if (unlikely(idx >= 0)) {
                    940: 
                    941:                flag = cpu_opcode_type(family, prev_pc, pc);
1.1.1.2   root      942:                if (flag == CALL_SUBROUTINE || flag == CALL_EXCEPTION) {
1.1       root      943:                        /* special HACK for for EmuTOS AES switcher which
                    944:                         * changes stack content to remove itself from call
                    945:                         * stack and uses RTS for subroutine *calls*, not
                    946:                         * for returning from them.
                    947:                         *
                    948:                         * It wouldn't be reliable to detect calls from it,
                    949:                         * so I'm making call *to* it show up as branch, to
                    950:                         * keep callstack depth correct.
                    951:                         */
                    952:                        if (unlikely(pc == etos_switcher)) {
                    953:                                flag = CALL_BRANCH;
                    954:                        } else if (unlikely(prev_pc == PC_UNDEFINED)) {
                    955:                                /* if first profiled instruction
                    956:                                 * is subroutine call, it doesn't have
                    957:                                 * valid prev_pc value stored
                    958:                                 */
                    959:                                cpu_callinfo.return_pc = PC_UNDEFINED;
1.1.1.3   root      960:                                fprintf(stderr, "WARNING: previous PC for tracked address 0x%d is undefined!\n", pc);
1.1       root      961: #if DEBUG
                    962:                                skip_assert = true;
                    963:                                DebugUI(REASON_CPU_EXCEPTION);
                    964: #endif
                    965:                        } else {
                    966:                                /* slow! */
                    967:                                cpu_callinfo.return_pc = Disasm_GetNextPC(prev_pc);
                    968:                        }
                    969:                } else if (caller_pc != PC_UNDEFINED) {
1.1.1.2   root      970:                        /* returned from function to first instruction of another symbol:
1.1       root      971:                         *      0xf384  jsr some_function
                    972:                         *      other_symbol:
                    973:                         *      0f3x8a  some_instruction
                    974:                         * -> change return instruction address to
                    975:                         *    address of what did the returned call.
                    976:                         */
                    977:                        prev_pc = caller_pc;
                    978:                        assert(is_prev_instr(prev_pc, pc));
                    979:                        flag = CALL_NEXT;
                    980:                }
                    981:                Profile_CallStart(idx, &cpu_callinfo, prev_pc, flag, pc, counters);
                    982:        }
                    983: }
                    984: 
                    985: /**
1.1.1.2   root      986:  * log last loop info, if there's suitable data for one
                    987:  */
                    988: static void log_last_loop(void)
                    989: {
                    990:        unsigned len = cpu_profile.loop_end - cpu_profile.loop_start;
                    991:        if (cpu_profile.loop_count > 1 && (len < profile_loop.cpu_limit || !profile_loop.cpu_limit)) {
                    992:                fprintf(profile_loop.fp, "CPU %d 0x%06x %d %d\n", nVBLs,
                    993:                        cpu_profile.loop_start, len, cpu_profile.loop_count);
                    994:        }
                    995: }
                    996: 
1.1.1.3   root      997: # if DEBUG || ENABLE_WINUAE_CPU
                    998: /**
                    999:  * Warning for values going out of expected range
                   1000:  */
                   1001: static Uint32 warn_too_large(const char *name, const int value, const int limit, const Uint32 prev_pc, const Uint32 pc)
                   1002: {
                   1003:        Uint32 nextpc;
                   1004:        fprintf(stderr, "WARNING: unexpected (%d > %d) %s at 0x%x:\n", value, limit - 1, name, pc);
                   1005:        Disasm(stderr, prev_pc, &nextpc, 1);
                   1006:        Disasm(stderr, pc, &nextpc, 1);
                   1007: #if DEBUG
                   1008:        skip_assert = true;
                   1009:        DebugUI(REASON_CPU_EXCEPTION);
                   1010: #endif
                   1011:        return limit - 1;
                   1012: }
                   1013: #endif
                   1014: 
1.1.1.2   root     1015: /**
1.1       root     1016:  * Update CPU cycle and count statistics for PC address.
                   1017:  *
                   1018:  * This gets called after instruction has executed and PC
                   1019:  * has advanced to next instruction.
                   1020:  */
                   1021: void Profile_CpuUpdate(void)
                   1022: {
                   1023:        counters_t *counters = &(cpu_profile.all);
1.1.1.3   root     1024:        Uint32 pc, prev_pc, idx, cycles;
1.1       root     1025:        cpu_profile_item_t *prev;
1.1.1.3   root     1026: #if ENABLE_WINUAE_CPU
                   1027:        Uint32 i_hits, d_hits, i_misses, d_misses;
                   1028: #else
                   1029:        const Uint32 i_misses = 0, d_hits = 0;
                   1030: #endif
1.1       root     1031: 
                   1032:        prev_pc = cpu_profile.prev_pc;
1.1.1.3   root     1033:        /* PC may have extra bits when using 24 bit addressing, they need to be masked away as
1.1       root     1034:         * emulation itself does that too when PC value is used
                   1035:         */
1.1.1.3   root     1036:        cpu_profile.prev_pc = pc = M68000_GetPC();
                   1037:        if (ConfigureParams.System.bAddressSpace24) {
                   1038:                cpu_profile.prev_pc &= 0xffffff;
                   1039:        }
1.1.1.2   root     1040:        if (unlikely(profile_loop.fp)) {
                   1041:                if (pc < prev_pc) {
                   1042:                        if (pc == cpu_profile.loop_start && prev_pc == cpu_profile.loop_end) {
                   1043:                                cpu_profile.loop_count++;
                   1044:                        } else {
                   1045:                                cpu_profile.loop_start = pc;
                   1046:                                cpu_profile.loop_end = prev_pc;
                   1047:                                cpu_profile.loop_count = 1;
                   1048:                        }
                   1049:                } else {
                   1050:                        if (pc > cpu_profile.loop_end) {
                   1051:                                log_last_loop();
1.1.1.3   root     1052:                                cpu_profile.loop_end = 0xffffffff;
1.1.1.2   root     1053:                                cpu_profile.loop_count = 0;
                   1054:                        }
                   1055:                }
                   1056:        }
                   1057: 
1.1       root     1058:        idx = address2index(prev_pc);
                   1059:        assert(idx <= cpu_profile.size);
                   1060:        prev = cpu_profile.data + idx;
                   1061: 
                   1062:        if (likely(prev->count < MAX_CPU_PROFILE_VALUE)) {
                   1063:                prev->count++;
                   1064:        }
                   1065: 
1.1.1.3   root     1066:        cycles = CyclesGlobalClockCounter - cpu_profile.prev_cycles;
                   1067:        cpu_profile.prev_cycles = CyclesGlobalClockCounter;
1.1       root     1068: 
                   1069:        if (likely(prev->cycles < MAX_CPU_PROFILE_VALUE - cycles)) {
                   1070:                prev->cycles += cycles;
                   1071:        } else {
                   1072:                prev->cycles = MAX_CPU_PROFILE_VALUE;
                   1073:        }
                   1074: 
                   1075: #if ENABLE_WINUAE_CPU
1.1.1.3   root     1076:        /* only WinUAE CPU core provides cache information */
                   1077:        i_hits = CpuInstruction.I_Cache_hit;
                   1078:        d_hits = CpuInstruction.D_Cache_hit;
                   1079:        i_misses = CpuInstruction.I_Cache_miss;
                   1080:        d_misses = CpuInstruction.D_Cache_miss;
                   1081: 
                   1082:        /* reset cache stats after reading them (for the next instruction) */
                   1083:        CpuInstruction.I_Cache_hit = 0;
                   1084:        CpuInstruction.D_Cache_hit = 0;
                   1085:        CpuInstruction.I_Cache_miss = 0;
                   1086:        CpuInstruction.D_Cache_miss = 0;
                   1087: 
                   1088:        /* tracked for every address */
1.1.1.5   root     1089: # if DEBUG_CACHE
                   1090:        if (likely(prev->i_hits < MAX_CPU_PROFILE_VALUE - i_hits)) {
                   1091:                prev->i_hits += i_hits;
                   1092:        } else {
                   1093:                prev->i_hits = MAX_CPU_PROFILE_VALUE;
                   1094:        }
                   1095:        if (likely(prev->d_misses < MAX_CPU_PROFILE_VALUE - d_misses)) {
                   1096:                prev->d_misses += d_misses;
                   1097:        } else {
                   1098:                prev->d_misses = MAX_CPU_PROFILE_VALUE;
                   1099:        }
                   1100: # endif
1.1.1.3   root     1101:        if (likely(prev->i_misses < MAX_CPU_PROFILE_VALUE - i_misses)) {
                   1102:                prev->i_misses += i_misses;
1.1       root     1103:        } else {
1.1.1.3   root     1104:                prev->i_misses = MAX_CPU_PROFILE_VALUE;
1.1       root     1105:        }
1.1.1.3   root     1106:        if (likely(prev->d_hits < MAX_CPU_PROFILE_VALUE - d_hits)) {
                   1107:                prev->d_hits += d_hits;
                   1108:        } else {
                   1109:                prev->d_hits = MAX_CPU_PROFILE_VALUE;
                   1110:        }
                   1111: 
                   1112:        /* tracking for histogram, check for array overflows */
1.1.1.5   root     1113:        if (!(i_hits || i_misses)) {
                   1114:                cpu_profile.i_prefetched++;
                   1115:        }
1.1.1.3   root     1116:        if (unlikely(i_hits >= MAX_I_HITS)) {
                   1117:                i_hits = warn_too_large("number of CPU instruction cache hits", i_hits, MAX_I_HITS, prev_pc, pc);
                   1118:        }
                   1119:        cpu_profile.i_hit_counts[i_hits]++;
                   1120: 
                   1121:        if (unlikely(i_misses >= MAX_I_MISSES)) {
                   1122:                i_misses = warn_too_large("number of CPU instruction cache misses", i_misses, MAX_I_MISSES, prev_pc, pc);
                   1123:        }
                   1124:        cpu_profile.i_miss_counts[i_misses]++;
                   1125: 
                   1126:        if (unlikely(d_hits >= MAX_D_HITS)) {
                   1127:                d_hits = warn_too_large("number of CPU data cache hits", d_hits, MAX_D_HITS, prev_pc, pc);
                   1128:        }
                   1129:        cpu_profile.d_hit_counts[d_hits]++;
                   1130: 
                   1131:        if (unlikely(d_misses >= MAX_D_MISSES)) {
                   1132:                d_misses = warn_too_large("number of CPU data cache misses", d_misses, MAX_D_MISSES, prev_pc, pc);
                   1133:        }
                   1134:        cpu_profile.d_miss_counts[d_misses]++;
1.1.1.5   root     1135: #endif   /* ENABLE_WINUAE_CPU */
1.1.1.3   root     1136: 
1.1       root     1137:        if (cpu_callinfo.sites) {
                   1138:                collect_calls(prev_pc, counters);
                   1139:        }
1.1.1.5   root     1140:        /* total counters are increased after caller info is processed,
1.1       root     1141:         * otherwise cost for the instruction calling the callee
                   1142:         * doesn't get accounted to caller (but callee).
                   1143:         */
                   1144:        counters->count++;
1.1.1.3   root     1145:        counters->cycles += cycles;
                   1146:        counters->i_misses += i_misses;
                   1147:        counters->d_hits += d_hits;
1.1       root     1148: 
                   1149: #if DEBUG
                   1150:        if (unlikely(OpcodeFamily == 0)) {
                   1151:                Uint32 nextpc;
                   1152:                fputs("WARNING: instruction opcode family is zero (=i_ILLG) for instruction:\n", stderr);
                   1153:                Disasm(stderr, prev_pc, &nextpc, 1);
                   1154:        }
                   1155:        /* catch too large (and negative) cycles for other than STOP instruction */
                   1156:        if (unlikely(cycles > 512 && OpcodeFamily != i_STOP)) {
1.1.1.3   root     1157:                warn_too_large("cycles", cycles, 512, prev_pc, pc);
1.1       root     1158:        }
1.1.1.3   root     1159: # if !ENABLE_WINUAE_CPU
                   1160:        {
                   1161:                static Uint32 prev_cycles = 0, prev_pc2 = 0;
                   1162:                if (unlikely(cycles == 0 && prev_cycles == 0)) {
                   1163:                        Uint32 nextpc;
                   1164:                        fputs("WARNING: Zero cycles for successive opcodes:\n", stderr);
                   1165:                        Disasm(stderr, prev_pc2, &nextpc, 1);
                   1166:                        Disasm(stderr, prev_pc, &nextpc, 1);
                   1167:                }
                   1168:                prev_cycles = cycles;
                   1169:                prev_pc2 = prev_pc;
1.1       root     1170:        }
1.1.1.3   root     1171: # endif
1.1       root     1172: #endif
                   1173: }
                   1174: 
                   1175: 
                   1176: /**
                   1177:  * Helper for accounting CPU profile area item.
                   1178:  */
                   1179: static void update_area_item(profile_area_t *area, Uint32 addr, cpu_profile_item_t *item)
                   1180: {
                   1181:        Uint32 cycles = item->cycles;
                   1182:        Uint32 count = item->count;
                   1183: 
                   1184:        if (!count) {
                   1185:                return;
                   1186:        }
                   1187:        area->counters.count += count;
                   1188:        area->counters.cycles += cycles;
1.1.1.3   root     1189:        area->counters.i_misses += item->i_misses;
                   1190:        area->counters.d_hits += item->d_hits;
1.1       root     1191: 
                   1192:        if (cycles == MAX_CPU_PROFILE_VALUE) {
                   1193:                area->overflow = true;
                   1194:        }
                   1195:        if (addr < area->lowest) {
                   1196:                area->lowest = addr;
                   1197:        }
                   1198:        area->highest = addr;
                   1199: 
                   1200:        area->active++;
                   1201: }
                   1202: 
                   1203: /**
                   1204:  * Helper for collecting CPU profile area statistics.
                   1205:  */
                   1206: static Uint32 update_area(profile_area_t *area, Uint32 start, Uint32 end)
                   1207: {
                   1208:        cpu_profile_item_t *item;
                   1209:        Uint32 addr;
                   1210: 
                   1211:        memset(area, 0, sizeof(profile_area_t));
                   1212:        area->lowest = cpu_profile.size;
                   1213: 
                   1214:        item = &(cpu_profile.data[start]);
                   1215:        for (addr = start; addr < end; addr++, item++) {
                   1216:                update_area_item(area, addr, item);
                   1217:        }
                   1218:        return addr;
                   1219: }
                   1220: 
                   1221: /**
                   1222:  * Helper for initializing CPU profile area sorting indexes.
                   1223:  */
                   1224: static Uint32* index_area(profile_area_t *area, Uint32 *sort_arr)
                   1225: {
                   1226:        cpu_profile_item_t *item;
                   1227:        Uint32 addr;
                   1228: 
                   1229:        item = &(cpu_profile.data[area->lowest]);
                   1230:        for (addr = area->lowest; addr <= area->highest; addr++, item++) {
                   1231:                if (item->count) {
                   1232:                        *sort_arr++ = addr;
                   1233:                }
                   1234:        }
                   1235:        return sort_arr;
                   1236: }
                   1237: 
                   1238: /**
                   1239:  * Stop and process the CPU profiling data; collect stats and
                   1240:  * prepare for more optimal sorting.
                   1241:  */
                   1242: void Profile_CpuStop(void)
                   1243: {
                   1244:        Uint32 *sort_arr, next;
1.1.1.3   root     1245:        unsigned int size, stsize;
1.1       root     1246:        int active;
                   1247: 
                   1248:        if (cpu_profile.processed || !cpu_profile.enabled) {
                   1249:                return;
                   1250:        }
1.1.1.2   root     1251: 
                   1252:        log_last_loop();
                   1253:        if (profile_loop.fp) {
                   1254:                fflush(profile_loop.fp);
                   1255:        }
                   1256: 
1.1       root     1257:        /* user didn't change RAM or TOS size in the meanwhile? */
1.1.1.3   root     1258:        size = stsize = (STRamEnd + CART_SIZE + TosSize) / 2;
1.1.1.5   root     1259:        if (TTmemory && ConfigureParams.Memory.TTRamSize_KB) {
                   1260:                size += ConfigureParams.Memory.TTRamSize_KB * 1024/2;
1.1.1.3   root     1261:        }
                   1262:        assert(cpu_profile.size == size);
1.1       root     1263: 
                   1264:        Profile_FinalizeCalls(&(cpu_callinfo), &(cpu_profile.all), Symbols_GetByCpuAddress);
                   1265: 
                   1266:        /* find lowest and highest addresses executed etc */
                   1267:        next = update_area(&cpu_profile.ram, 0, STRamEnd/2);
1.1.1.3   root     1268:        if (TosAddress < CART_START) {
                   1269:                next = update_area(&cpu_profile.tos, next, (STRamEnd + TosSize)/2);
                   1270:                next = update_area(&cpu_profile.rom, next, stsize);
                   1271:        } else {
                   1272:                next = update_area(&cpu_profile.rom, next, (STRamEnd + CART_SIZE)/2);
                   1273:                next = update_area(&cpu_profile.tos, next, stsize);
                   1274:        }
                   1275:        next = update_area(&cpu_profile.ttram, next, size);
                   1276:        assert(next == size);
1.1       root     1277: 
                   1278: #if DEBUG
                   1279:        if (skip_assert) {
                   1280:                skip_assert = false;
                   1281:        } else
                   1282: #endif
                   1283:        {
1.1.1.3   root     1284: #if DEBUG
                   1285:                if (cpu_profile.all.count != cpu_profile.ttram.counters.count + cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count) {
                   1286:                        fprintf(stderr, "ERROR, instruction count mismatch:\n\t%"PRIu64" != %"PRIu64" + %"PRIu64" + %"PRIu64" + %"PRIu64"?\n",
                   1287:                                cpu_profile.all.count, cpu_profile.ttram.counters.count, cpu_profile.ram.counters.count,
                   1288:                                cpu_profile.tos.counters.count, cpu_profile.rom.counters.count);
                   1289:                        fprintf(stderr, "If there was debugger invocation from profiling before this, try with profiler DEBUG define disabled!!!\n");
                   1290:                }
                   1291: #endif
                   1292:                assert(cpu_profile.all.count == cpu_profile.ttram.counters.count + cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count);
                   1293:                assert(cpu_profile.all.cycles == cpu_profile.ttram.counters.cycles + cpu_profile.ram.counters.cycles + cpu_profile.tos.counters.cycles + cpu_profile.rom.counters.cycles);
                   1294:                assert(cpu_profile.all.i_misses == cpu_profile.ttram.counters.i_misses + cpu_profile.ram.counters.i_misses + cpu_profile.tos.counters.i_misses + cpu_profile.rom.counters.i_misses);
                   1295:                assert(cpu_profile.all.d_hits == cpu_profile.ttram.counters.d_hits + cpu_profile.ram.counters.d_hits + cpu_profile.tos.counters.d_hits + cpu_profile.rom.counters.d_hits);
1.1       root     1296:        }
                   1297: 
                   1298:        /* allocate address array for sorting */
1.1.1.3   root     1299:        active = cpu_profile.ttram.active + cpu_profile.ram.active + cpu_profile.rom.active + cpu_profile.tos.active;
1.1       root     1300:        sort_arr = calloc(active, sizeof(*sort_arr));
                   1301: 
                   1302:        if (!sort_arr) {
                   1303:                perror("ERROR: allocating CPU profile address data");
                   1304:                free(cpu_profile.data);
                   1305:                cpu_profile.data = NULL;
                   1306:                return;
                   1307:        }
                   1308:        printf("Allocated CPU profile address buffer (%d KB).\n",
                   1309:               (int)sizeof(*sort_arr)*(active+512)/1024);
                   1310:        cpu_profile.sort_arr = sort_arr;
                   1311:        cpu_profile.active = active;
                   1312: 
                   1313:        /* and fill addresses for used instructions... */
                   1314:        sort_arr = index_area(&cpu_profile.ram, sort_arr);
                   1315:        sort_arr = index_area(&cpu_profile.tos, sort_arr);
                   1316:        sort_arr = index_area(&cpu_profile.rom, sort_arr);
1.1.1.3   root     1317:        sort_arr = index_area(&cpu_profile.ttram, sort_arr);
1.1       root     1318:        assert(sort_arr == cpu_profile.sort_arr + cpu_profile.active);
                   1319:        //printf("%d/%d/%d\n", area->active, sort_arr-cpu_profile.sort_arr, active);
                   1320: 
                   1321:        Profile_CpuShowStats();
                   1322:        cpu_profile.processed = true;
                   1323: }
                   1324: 
                   1325: /**
                   1326:  * Get pointers to CPU profile enabling and disasm address variables
                   1327:  * for updating them (in parser).
                   1328:  */
                   1329: void Profile_CpuGetPointers(bool **enabled, Uint32 **disasm_addr)
                   1330: {
                   1331:        *disasm_addr = &cpu_profile.disasm_addr;
                   1332:        *enabled = &cpu_profile.enabled;
                   1333: }
                   1334: 
                   1335: /**
                   1336:  * Get callinfo & symbol search pointers for stack walking.
                   1337:  */
1.1.1.5   root     1338: void Profile_CpuGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32, symtype_t))
1.1       root     1339: {
                   1340:        *callinfo = &(cpu_callinfo);
                   1341:        *get_symbol = Symbols_GetByCpuAddress;
                   1342: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.