Annotation of hatari/src/debug/profilecpu.c, revision 1.1.1.3

1.1       root        1: /*
                      2:  * Hatari - profilecpu.c
                      3:  * 
1.1.1.3 ! root        4:  * Copyright (C) 2010-2015 by Eero Tamminen
1.1       root        5:  *
                      6:  * This file is distributed under the GNU General Public License, version 2
                      7:  * or at your option any later version. Read the file gpl.txt for details.
                      8:  *
                      9:  * profilecpu.c - functions for profiling CPU and showing the results.
                     10:  */
                     11: const char Profilecpu_fileid[] = "Hatari profilecpu.c : " __DATE__ " " __TIME__;
                     12: 
                     13: #include <stdio.h>
                     14: #include <inttypes.h>
                     15: #include <assert.h>
                     16: #include "main.h"
                     17: #include "configuration.h"
                     18: #include "clocks_timings.h"
                     19: #include "debugInfo.h"
                     20: #include "dsp.h"
                     21: #include "m68000.h"
                     22: #include "68kDisass.h"
                     23: #include "profile.h"
                     24: #include "profile_priv.h"
                     25: #include "stMemory.h"
                     26: #include "symbols.h"
                     27: #include "tos.h"
1.1.1.2   root       28: #include "screen.h"
                     29: #include "video.h"
                     30: 
                     31: 
                     32: /* cartridge area */
                     33: #define CART_START     0xFA0000
                     34: #define CART_END       0xFC0000
                     35: #define CART_SIZE      (CART_END - CART_START)
                     36: 
1.1.1.3 ! root       37: #define TTRAM_START    0x01000000
1.1       root       38: 
                     39: /* if non-zero, output (more) warnings on suspicious:
                     40:  * - cycle/instruction counts
                     41:  * - PC switches
1.1.1.3 ! root       42:  * And drop to debugger on invalid current & previous PC addresses.
        !            43:  *
        !            44:  * NOTE: DebugUI() calls that DEBUG define enables, can cause
        !            45:  * instruction count mismatch assertions because debugger invocation
        !            46:  * resets the counters AND happens in middle of data collection.
        !            47:  * It's best to quit after debugging the issue ('q' command).
1.1       root       48:  */
                     49: #define DEBUG 0
                     50: #if DEBUG
                     51: #include "debugui.h"
                     52: static bool skip_assert;
                     53: #endif
                     54: 
                     55: static callinfo_t cpu_callinfo;
                     56: 
                     57: #define MAX_CPU_PROFILE_VALUE 0xFFFFFFFF
                     58: 
                     59: typedef struct {
                     60:        Uint32 count;   /* how many times this address instrcution is executed */
                     61:        Uint32 cycles;  /* how many CPU cycles was taken at this address */
1.1.1.3 ! root       62:        Uint32 i_misses;  /* how many CPU instruction cache misses happened at this address */
        !            63:        Uint32 d_hits;  /* how many CPU data cache hits happened at this address */
1.1       root       64: } cpu_profile_item_t;
                     65: 
1.1.1.3 ! root       66: #define MAX_I_HITS   8
        !            67: #define MAX_I_MISSES 8
        !            68: #define MAX_D_HITS   32
        !            69: #define MAX_D_MISSES 20
1.1       root       70: 
                     71: static struct {
                     72:        counters_t all;       /* total counts for all areas */
                     73:        cpu_profile_item_t *data; /* profile data items */
                     74:        Uint32 size;          /* number of allocated profile data items */
1.1.1.3 ! root       75:        profile_area_t ttram; /* TT-RAM stats */
1.1       root       76:        profile_area_t ram;   /* normal RAM stats */
                     77:        profile_area_t rom;   /* cartridge ROM stats */
                     78:        profile_area_t tos;   /* ROM TOS stats */
                     79:        int active;           /* number of active data items in all areas */
                     80:        Uint32 *sort_arr;     /* data indexes used for sorting */
1.1.1.2   root       81:        int prev_family;      /* previous instruction opcode family */
1.1.1.3 ! root       82:        Uint64 prev_cycles;   /* previous instruction cycles counter */
1.1       root       83:        Uint32 prev_pc;       /* previous instruction address */
1.1.1.2   root       84:        Uint32 loop_start;    /* address of last loop start */
                     85:        Uint32 loop_end;      /* address of last loop end */
                     86:        Uint32 loop_count;    /* how many times it was looped */
1.1       root       87:        Uint32 disasm_addr;   /* 'addresses' command start address */
1.1.1.3 ! root       88: #if ENABLE_WINUAE_CPU
        !            89:        Uint32 i_hit_counts[MAX_I_HITS];    /* I-cache hit counts */
        !            90:        Uint32 d_hit_counts[MAX_D_HITS];    /* D-cache hit counts */
        !            91:        Uint32 i_miss_counts[MAX_I_MISSES]; /* I-cache miss counts */
        !            92:        Uint32 d_miss_counts[MAX_D_MISSES]; /* D-cache miss counts */
        !            93: #endif
1.1       root       94:        bool processed;       /* true when data is already processed */
                     95:        bool enabled;         /* true when profiling enabled */
                     96: } cpu_profile;
                     97: 
                     98: /* special hack for EmuTOS */
                     99: static Uint32 etos_switcher;
                    100: 
                    101: 
                    102: /* ------------------ CPU profile address mapping ----------------- */
                    103: 
                    104: /**
                    105:  * convert Atari memory address to sorting array profile data index.
                    106:  */
                    107: static inline Uint32 address2index(Uint32 pc)
                    108: {
                    109:        if (unlikely(pc & 1)) {
                    110:                fprintf(stderr, "WARNING: odd CPU profile instruction address 0x%x!\n", pc);
                    111: #if DEBUG
                    112:                skip_assert = true;
                    113:                DebugUI(REASON_CPU_EXCEPTION);
                    114: #endif
                    115:        }
1.1.1.2   root      116:        if (pc < STRamEnd) {
                    117:                /* most likely case, use RAM address as-is */
                    118: 
                    119:        } else if (pc >= TosAddress && pc < TosAddress + TosSize) {
1.1       root      120:                /* TOS, put it after RAM data */
                    121:                pc = pc - TosAddress + STRamEnd;
1.1.1.2   root      122:                if (TosAddress >= CART_END) {
                    123:                        /* and after cartridge data as it's higher */
                    124:                        pc += CART_SIZE;
                    125:                }
                    126:        } else if (pc >= CART_START && pc < CART_END) {
                    127:                /* ROM, put it after RAM data */
                    128:                pc = pc - CART_START + STRamEnd;
                    129:                if (TosAddress < CART_START) {
                    130:                        /* and after TOS as it's higher */
                    131:                        pc += TosSize;
                    132:                }
1.1.1.3 ! root      133: #if ENABLE_WINUAE_CPU
        !           134:        } else if (TTmemory && pc >= TTRAM_START && pc < TTRAM_START + 1024*1024*(unsigned)ConfigureParams.Memory.nTTRamSize) {
        !           135:                pc += STRamEnd + TosSize + CART_SIZE - TTRAM_START;
        !           136: #endif
1.1       root      137:        } else {
1.1.1.2   root      138:                fprintf(stderr, "WARNING: 'invalid' CPU PC profile instruction address 0x%x!\n", pc);
                    139:                /* extra entry at end is reserved for invalid PC values */
1.1.1.3 ! root      140:                pc = STRamEnd + TosSize + CART_SIZE;
1.1       root      141: #if DEBUG
1.1.1.2   root      142:                skip_assert = true;
                    143:                DebugUI(REASON_CPU_EXCEPTION);
1.1       root      144: #endif
                    145:        }
                    146:        /* CPU instructions are at even addresses, save space by halving */
                    147:        return (pc >> 1);
                    148: }
                    149: 
                    150: /**
                    151:  * convert sorting array profile data index to Atari memory address.
                    152:  */
                    153: static Uint32 index2address(Uint32 idx)
                    154: {
                    155:        idx <<= 1;
                    156:        /* RAM */
                    157:        if (idx < STRamEnd) {
                    158:                return idx;
                    159:        }
                    160:        idx -= STRamEnd;
1.1.1.2   root      161:        /* TOS before cartridge area? */
                    162:        if (TosAddress < CART_START) {
                    163:                /* TOS */
                    164:                if (idx < TosSize) {
                    165:                        return idx + TosAddress;
                    166:                }
                    167:                idx -= TosSize;
                    168:                /* ROM */
1.1.1.3 ! root      169:                if (idx < CART_SIZE) {
        !           170:                        return idx + CART_START;
        !           171:                }
        !           172:                idx -= CART_SIZE;
1.1.1.2   root      173:        } else {
                    174:                /* ROM */
                    175:                if (idx < CART_SIZE) {
                    176:                        return idx + CART_START;
                    177:                }
                    178:                idx -= CART_SIZE;
                    179:                /* TOS */
1.1.1.3 ! root      180:                if (idx < TosSize) {
        !           181:                        return idx + TosAddress;
        !           182:                }
        !           183:                idx -= TosSize;
1.1       root      184:        }
1.1.1.3 ! root      185:        return idx + TTRAM_START;
1.1       root      186: }
                    187: 
                    188: /* ------------------ CPU profile results ----------------- */
                    189: 
                    190: /**
                    191:  * Get CPU cycles, count and count percentage for given address.
                    192:  * Return true if data was available and non-zero, false otherwise.
                    193:  */
1.1.1.3 ! root      194: bool Profile_CpuAddressData(Uint32 addr, float *percentage, Uint32 *count, Uint32 *cycles, Uint32 *i_misses, Uint32 *d_hits)
1.1       root      195: {
                    196:        Uint32 idx;
                    197:        if (!cpu_profile.data) {
                    198:                return false;
                    199:        }
                    200:        idx = address2index(addr);
1.1.1.3 ! root      201:        *i_misses = cpu_profile.data[idx].i_misses;
        !           202:        *d_hits = cpu_profile.data[idx].d_hits;
1.1       root      203:        *cycles = cpu_profile.data[idx].cycles;
                    204:        *count = cpu_profile.data[idx].count;
                    205:        if (cpu_profile.all.count) {
                    206:                *percentage = 100.0*(*count)/cpu_profile.all.count;
                    207:        } else {
                    208:                *percentage = 0.0;
                    209:        }
                    210:        return (*count > 0);
                    211: }
                    212: 
                    213: /**
                    214:  * Helper to show statistics for specified CPU profile area.
                    215:  */
                    216: static void show_cpu_area_stats(profile_area_t *area)
                    217: {
                    218:        if (!area->active) {
                    219:                fprintf(stderr, "- no activity\n");
                    220:                return;
                    221:        }
                    222:        fprintf(stderr, "- active address range:\n  0x%06x-0x%06x\n",
                    223:                index2address(area->lowest),
                    224:                index2address(area->highest));
                    225:        fprintf(stderr, "- active instruction addresses:\n  %d (%.2f%% of all)\n",
                    226:                area->active,
                    227:                100.0 * area->active / cpu_profile.active);
                    228:        fprintf(stderr, "- executed instructions:\n  %"PRIu64" (%.2f%% of all)\n",
                    229:                area->counters.count,
                    230:                100.0 * area->counters.count / cpu_profile.all.count);
1.1.1.3 ! root      231:        /* CPU cache in use? */
        !           232:        if (cpu_profile.all.i_misses) {
1.1       root      233:                fprintf(stderr, "- instruction cache misses:\n  %"PRIu64" (%.2f%% of all)\n",
1.1.1.3 ! root      234:                        area->counters.i_misses,
        !           235:                        100.0 * area->counters.i_misses / cpu_profile.all.i_misses);
        !           236:        }
        !           237:        if (cpu_profile.all.d_hits) {
        !           238:                fprintf(stderr, "- data cache hits:\n  %"PRIu64" (%.2f%% of all)\n",
        !           239:                        area->counters.d_hits,
        !           240:                        100.0 * area->counters.d_hits / cpu_profile.all.d_hits);
1.1       root      241:        }
                    242:        fprintf(stderr, "- used cycles:\n  %"PRIu64" (%.2f%% of all)\n  = %.5fs\n",
                    243:                area->counters.cycles,
                    244:                100.0 * area->counters.cycles / cpu_profile.all.cycles,
                    245:                (double)area->counters.cycles / MachineClocks.CPU_Freq);
                    246:        if (area->overflow) {
                    247:                fprintf(stderr, "  *** COUNTER OVERFLOW! ***\n");
                    248:        }
                    249: }
                    250: 
                    251: 
                    252: /**
                    253:  * show CPU area (RAM, ROM, TOS) specific statistics.
                    254:  */
                    255: void Profile_CpuShowStats(void)
                    256: {
                    257:        fprintf(stderr, "Normal RAM (0-0x%X):\n", STRamEnd);
                    258:        show_cpu_area_stats(&cpu_profile.ram);
                    259: 
                    260:        fprintf(stderr, "ROM TOS (0x%X-0x%X):\n", TosAddress, TosAddress + TosSize);
                    261:        show_cpu_area_stats(&cpu_profile.tos);
                    262: 
1.1.1.2   root      263:        fprintf(stderr, "Cartridge ROM (0x%X-%X):\n", CART_START, CART_END);
1.1       root      264:        show_cpu_area_stats(&cpu_profile.rom);
                    265: 
1.1.1.3 ! root      266:        if (TTmemory && ConfigureParams.Memory.nTTRamSize) {
        !           267:                fprintf(stderr, "TT-RAM (0x%X-%X):\n", TTRAM_START, TTRAM_START + 1024*1024*ConfigureParams.Memory.nTTRamSize);
        !           268:                show_cpu_area_stats(&cpu_profile.ttram);
        !           269:        }
        !           270: 
1.1       root      271:        fprintf(stderr, "\n= %.5fs\n",
                    272:                (double)cpu_profile.all.cycles / MachineClocks.CPU_Freq);
1.1.1.3 ! root      273: }
1.1       root      274: 
                    275: #if ENABLE_WINUAE_CPU
1.1.1.3 ! root      276: /**
        !           277:  * show percentage histogram of given array items
        !           278:  */
        !           279: static void show_histogram(const char *title, int count, Uint32 *items)
        !           280: {
        !           281:        Uint64 maxval;
        !           282:        Uint32 value;
        !           283:        int i;
        !           284: 
        !           285:        fprintf(stderr, "\n%s, number of occurrencies:\n", title);
        !           286:        maxval = 0;
        !           287:        for (i = 0; i < count; i++) {
        !           288:                maxval += items[i];
        !           289:        }
        !           290:        for (i = 0; i < count; i++) {
        !           291:                value = items[i];
        !           292:                if (value) {
        !           293:                        int w, width = 50 * value / maxval+1;
        !           294:                        fprintf(stderr, " %2d: ", i);
        !           295:                        for (w = 0; w < width; w++) {
        !           296:                                fputc('#', stderr);
        !           297:                        }
        !           298:                        fprintf(stderr, " %.3f%%\n", 100.0 * value / maxval);
1.1       root      299:                }
                    300:        }
                    301: }
                    302: 
                    303: /**
1.1.1.3 ! root      304:  * show CPU cache usage histograms
        !           305:  */
        !           306: void Profile_CpuShowCaches(void)
        !           307: {
        !           308:        if (!(cpu_profile.all.i_misses || cpu_profile.all.d_hits)) {
        !           309:                fprintf(stderr, "No instruction/data cache information.");
        !           310:                return;
        !           311:        }
        !           312:        show_histogram("Instruction cache hits per instruction",
        !           313:                       ARRAYSIZE(cpu_profile.i_hit_counts), cpu_profile.i_hit_counts);
        !           314:        show_histogram("Instruction cache misses per instruction",
        !           315:                       ARRAYSIZE(cpu_profile.i_miss_counts), cpu_profile.i_miss_counts);
        !           316:        show_histogram("Data cache hits per instruction",
        !           317:                       ARRAYSIZE(cpu_profile.d_hit_counts), cpu_profile.d_hit_counts);
        !           318:        show_histogram("Data cache misses per instruction",
        !           319:                       ARRAYSIZE(cpu_profile.d_miss_counts), cpu_profile.d_miss_counts);
        !           320: }
        !           321: #else
        !           322: void Profile_CpuShowCaches(void) {
        !           323:        fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
        !           324: }
        !           325: #endif
        !           326: 
        !           327: /**
1.1       root      328:  * Show CPU instructions which execution was profiled, in the address order,
                    329:  * starting from the given address.  Return next disassembly address.
                    330:  */
                    331: Uint32 Profile_CpuShowAddresses(Uint32 lower, Uint32 upper, FILE *out)
                    332: {
                    333:        int oldcols[DISASM_COLUMNS], newcols[DISASM_COLUMNS];
                    334:        int show, shown, active;
                    335:        const char *symbol;
                    336:        cpu_profile_item_t *data;
                    337:        Uint32 idx, end, size;
                    338:        uaecptr nextpc, addr;
                    339: 
                    340:        data = cpu_profile.data;
                    341:        if (!data) {
                    342:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
                    343:                return 0;
                    344:        }
                    345: 
                    346:        size = cpu_profile.size;
                    347:        active = cpu_profile.active;
                    348:        if (upper) {
                    349:                end = address2index(upper);
                    350:                show = active;
                    351:                if (end > size) {
                    352:                        end = size;
                    353:                }
                    354:        } else {
                    355:                end = size;
                    356:                show = ConfigureParams.Debugger.nDisasmLines;
                    357:                if (!show || show > active) {
                    358:                        show = active;
                    359:                }
                    360:        }
                    361: 
                    362:        /* get/change columns */
                    363:        Disasm_GetColumns(oldcols);
                    364:        Disasm_DisableColumn(DISASM_COLUMN_HEXDUMP, oldcols, newcols);
                    365:        Disasm_SetColumns(newcols);
                    366: 
1.1.1.3 ! root      367:        fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <sum of i-cache misses>, <sum of d-cache hits>)\n", out);
1.1       root      368: 
                    369:        nextpc = 0;
                    370:        idx = address2index(lower);
                    371:        for (shown = 0; shown < show && idx < end; idx++) {
                    372:                if (!data[idx].count) {
                    373:                        continue;
                    374:                }
                    375:                addr = index2address(idx);
                    376:                if (addr != nextpc && nextpc) {
                    377:                        fprintf(out, "[...]\n");
                    378:                }
                    379:                symbol = Symbols_GetByCpuAddress(addr);
                    380:                if (symbol) {
                    381:                        fprintf(out, "%s:\n", symbol);
                    382:                }
                    383:                /* NOTE: column setup works only with 68kDisass disasm engine! */
                    384:                Disasm(out, addr, &nextpc, 1);
                    385:                shown++;
                    386:        }
                    387:        printf("Disassembled %d (of active %d) CPU addresses.\n", shown, active);
                    388: 
                    389:        /* restore disassembly columns */
                    390:        Disasm_SetColumns(oldcols);
                    391:        return nextpc;
                    392: }
                    393: 
                    394: /**
                    395:  * remove all disassembly columns except instruction ones.
                    396:  * data needed to restore columns is stored to "oldcols"
                    397:  */
                    398: static void leave_instruction_column(int *oldcols)
                    399: {
                    400:        int i, newcols[DISASM_COLUMNS];
                    401: 
                    402:        Disasm_GetColumns(oldcols);
                    403:        for (i = 0; i < DISASM_COLUMNS; i++) {
                    404:                if (i == DISASM_COLUMN_OPCODE || i == DISASM_COLUMN_OPERAND) {
                    405:                        continue;
                    406:                }
                    407:                Disasm_DisableColumn(i, oldcols, newcols);
                    408:                oldcols = newcols;
                    409:        }
                    410:        Disasm_SetColumns(newcols);
                    411: }
                    412: 
                    413: #if ENABLE_WINUAE_CPU
                    414: /**
                    415:  * compare function for qsort() to sort CPU profile data by instruction cache misses.
                    416:  */
1.1.1.3 ! root      417: static int cmp_cpu_i_misses(const void *p1, const void *p2)
1.1       root      418: {
1.1.1.3 ! root      419:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].i_misses;
        !           420:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].i_misses;
1.1       root      421:        if (count1 > count2) {
                    422:                return -1;
                    423:        }
                    424:        if (count1 < count2) {
                    425:                return 1;
                    426:        }
                    427:        return 0;
                    428: }
                    429: 
                    430: /**
                    431:  * Sort CPU profile data addresses by instruction cache misses and show the results.
                    432:  */
1.1.1.3 ! root      433: void Profile_CpuShowInstrMisses(int show)
        !           434: {
        !           435:        int active;
        !           436:        int oldcols[DISASM_COLUMNS];
        !           437:        Uint32 *sort_arr, *end, addr, nextpc;
        !           438:        cpu_profile_item_t *data = cpu_profile.data;
        !           439:        float percentage;
        !           440:        Uint32 count;
        !           441: 
        !           442:        if (!cpu_profile.all.i_misses) {
        !           443:                fprintf(stderr, "No CPU instruction cache miss information available.\n");
        !           444:                return;
        !           445:        }
        !           446: 
        !           447:        active = cpu_profile.active;
        !           448:        sort_arr = cpu_profile.sort_arr;
        !           449:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_i_misses);
        !           450: 
        !           451:        leave_instruction_column(oldcols);
        !           452: 
        !           453:        printf("addr:\t\ti-cache misses:\n");
        !           454:        show = (show < active ? show : active);
        !           455:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
        !           456:                addr = index2address(*sort_arr);
        !           457:                count = data[*sort_arr].i_misses;
        !           458:                percentage = 100.0*count/cpu_profile.all.i_misses;
        !           459:                printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
        !           460:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
        !           461:                Disasm(stdout, addr, &nextpc, 1);
        !           462:        }
        !           463:        printf("%d CPU addresses listed.\n", show);
        !           464: 
        !           465:        Disasm_SetColumns(oldcols);
        !           466: }
        !           467: 
        !           468: /**
        !           469:  * compare function for qsort() to sort CPU profile data by data cache hits.
        !           470:  */
        !           471: static int cmp_cpu_d_hits(const void *p1, const void *p2)
        !           472: {
        !           473:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].d_hits;
        !           474:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].d_hits;
        !           475:        if (count1 > count2) {
        !           476:                return -1;
        !           477:        }
        !           478:        if (count1 < count2) {
        !           479:                return 1;
        !           480:        }
        !           481:        return 0;
        !           482: }
        !           483: 
        !           484: /**
        !           485:  * Sort CPU profile data addresses by data cache hits and show the results.
        !           486:  */
        !           487: void Profile_CpuShowDataHits(int show)
1.1       root      488: {
                    489:        int active;
                    490:        int oldcols[DISASM_COLUMNS];
                    491:        Uint32 *sort_arr, *end, addr, nextpc;
                    492:        cpu_profile_item_t *data = cpu_profile.data;
                    493:        float percentage;
                    494:        Uint32 count;
                    495: 
1.1.1.3 ! root      496:        if (!cpu_profile.all.d_hits) {
        !           497:                fprintf(stderr, "No CPU data cache hit information available.\n");
1.1       root      498:                return;
                    499:        }
                    500: 
                    501:        active = cpu_profile.active;
                    502:        sort_arr = cpu_profile.sort_arr;
1.1.1.3 ! root      503:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_d_hits);
1.1       root      504: 
                    505:        leave_instruction_column(oldcols);
                    506: 
1.1.1.3 ! root      507:        printf("addr:\t\td-cache hits:\n");
1.1       root      508:        show = (show < active ? show : active);
                    509:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    510:                addr = index2address(*sort_arr);
1.1.1.3 ! root      511:                count = data[*sort_arr].d_hits;
        !           512:                percentage = 100.0*count/cpu_profile.all.d_hits;
1.1       root      513:                printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
                    514:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    515:                Disasm(stdout, addr, &nextpc, 1);
                    516:        }
                    517:        printf("%d CPU addresses listed.\n", show);
                    518: 
                    519:        Disasm_SetColumns(oldcols);
                    520: }
1.1.1.3 ! root      521: 
1.1       root      522: #else
1.1.1.3 ! root      523: void Profile_CpuShowInstrMisses(int show) {
        !           524:        fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
        !           525: }
        !           526: void Profile_CpuShowDataHits(int show) {
        !           527:        fprintf(stderr, "Cache information is recorded only with WinUAE CPU.\n");
1.1       root      528: }
                    529: #endif
                    530: 
                    531: 
                    532: /**
                    533:  * compare function for qsort() to sort CPU profile data by cycles counts.
                    534:  */
                    535: static int cmp_cpu_cycles(const void *p1, const void *p2)
                    536: {
                    537:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].cycles;
                    538:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].cycles;
                    539:        if (count1 > count2) {
                    540:                return -1;
                    541:        }
                    542:        if (count1 < count2) {
                    543:                return 1;
                    544:        }
                    545:        return 0;
                    546: }
                    547: 
                    548: /**
                    549:  * Sort CPU profile data addresses by cycle counts and show the results.
                    550:  */
                    551: void Profile_CpuShowCycles(int show)
                    552: {
                    553:        int active;
                    554:        int oldcols[DISASM_COLUMNS];
                    555:        Uint32 *sort_arr, *end, addr, nextpc;
                    556:        cpu_profile_item_t *data = cpu_profile.data;
                    557:        float percentage;
                    558:        Uint32 count;
                    559: 
                    560:        if (!data) {
                    561:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
                    562:                return;
                    563:        }
                    564: 
                    565:        active = cpu_profile.active;
                    566:        sort_arr = cpu_profile.sort_arr;
                    567:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_cycles);
                    568: 
                    569:        leave_instruction_column(oldcols);
                    570: 
                    571:        printf("addr:\t\tcycles:\n");
                    572:        show = (show < active ? show : active);
                    573:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    574:                addr = index2address(*sort_arr);
                    575:                count = data[*sort_arr].cycles;
                    576:                percentage = 100.0*count/cpu_profile.all.cycles;
                    577:                printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
                    578:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    579:                Disasm(stdout, addr, &nextpc, 1);
                    580:        }
                    581:        printf("%d CPU addresses listed.\n", show);
                    582: 
                    583:        Disasm_SetColumns(oldcols);
                    584: }
                    585: 
                    586: /**
                    587:  * compare function for qsort() to sort CPU profile data by descending
                    588:  * address access counts.
                    589:  */
                    590: static int cmp_cpu_count(const void *p1, const void *p2)
                    591: {
                    592:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].count;
                    593:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].count;
                    594:        if (count1 > count2) {
                    595:                return -1;
                    596:        }
                    597:        if (count1 < count2) {
                    598:                return 1;
                    599:        }
                    600:        return 0;
                    601: }
                    602: 
                    603: /**
                    604:  * Sort CPU profile data addresses by call counts and show the results.
                    605:  * If symbols are requested and symbols are loaded, show (only) addresses
                    606:  * matching a symbol.
                    607:  */
                    608: void Profile_CpuShowCounts(int show, bool only_symbols)
                    609: {
                    610:        cpu_profile_item_t *data = cpu_profile.data;
                    611:        int symbols, matched, active;
                    612:        int oldcols[DISASM_COLUMNS];
                    613:        Uint32 *sort_arr, *end, addr, nextpc;
                    614:        const char *name;
                    615:        float percentage;
                    616:        Uint32 count;
                    617: 
                    618:        if (!data) {
                    619:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
                    620:                return;
                    621:        }
                    622:        active = cpu_profile.active;
                    623:        show = (show < active ? show : active);
                    624: 
                    625:        sort_arr = cpu_profile.sort_arr;
                    626:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_count);
                    627: 
                    628:        if (!only_symbols) {
                    629:                leave_instruction_column(oldcols);
                    630:                printf("addr:\t\tcount:\n");
                    631:                for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    632:                        addr = index2address(*sort_arr);
                    633:                        count = data[*sort_arr].count;
                    634:                        percentage = 100.0*count/cpu_profile.all.count;
                    635:                        printf("0x%06x\t%5.2f%%\t%d%s\t",
                    636:                               addr, percentage, count,
                    637:                               count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    638:                        Disasm(stdout, addr, &nextpc, 1);
                    639:                }
                    640:                printf("%d CPU addresses listed.\n", show);
                    641:                Disasm_SetColumns(oldcols);
                    642:                return;
                    643:        }
                    644: 
                    645:        symbols = Symbols_CpuCount();
                    646:        if (!symbols) {
                    647:                fprintf(stderr, "ERROR: no CPU symbols loaded!\n");
                    648:                return;
                    649:        }
                    650:        matched = 0;    
                    651: 
                    652:        leave_instruction_column(oldcols);
                    653: 
                    654:        printf("addr:\t\tcount:\t\tsymbol:\n");
                    655:        for (end = sort_arr + active; sort_arr < end; sort_arr++) {
                    656: 
                    657:                addr = index2address(*sort_arr);
                    658:                name = Symbols_GetByCpuAddress(addr);
                    659:                if (!name) {
                    660:                        continue;
                    661:                }
                    662:                count = data[*sort_arr].count;
                    663:                percentage = 100.0*count/cpu_profile.all.count;
                    664:                printf("0x%06x\t%5.2f%%\t%d\t%s%s\t",
                    665:                       addr, percentage, count, name,
                    666:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    667:                Disasm(stdout, addr, &nextpc, 1);
                    668: 
                    669:                matched++;
                    670:                if (matched >= show || matched >= symbols) {
                    671:                        break;
                    672:                }
                    673:        }
                    674:        printf("%d CPU symbols listed.\n", matched);
                    675: 
                    676:        Disasm_SetColumns(oldcols);
                    677: }
                    678: 
                    679: 
                    680: static const char * addr2name(Uint32 addr, Uint64 *total)
                    681: {
                    682:        Uint32 idx = address2index(addr);
                    683:        *total = cpu_profile.data[idx].count;
                    684:        return Symbols_GetByCpuAddress(addr);
                    685: }
                    686: 
                    687: /**
                    688:  * Output CPU callers info to given file.
                    689:  */
                    690: void Profile_CpuShowCallers(FILE *fp)
                    691: {
                    692:        Profile_ShowCallers(fp, cpu_callinfo.sites, cpu_callinfo.site, addr2name);
                    693: }
                    694: 
                    695: /**
                    696:  * Save CPU profile information to given file.
                    697:  */
                    698: void Profile_CpuSave(FILE *out)
                    699: {
1.1.1.3 ! root      700:        Uint32 text, end;
        !           701:        fputs("Field names:\tExecuted instructions, Used cycles, Instruction cache misses, Data cache hits\n", out);
        !           702:        /* (Python) regexp that matches address and all described fields from disassembly:
        !           703:         * $<hex>  :  <ASM>  <percentage>% (<count>, <cycles>, <i-misses>, <d-hits>)
        !           704:         * $e5af38 :   rts           0.00% (12, 0, 12, 0)
1.1       root      705:         */
                    706:        fputs("Field regexp:\t^\\$([0-9a-f]+) :.*% \\((.*)\\)$\n", out);
                    707:        /* some information for interpreting the addresses */
1.1.1.3 ! root      708:        fprintf(out, "ST_RAM:\t\t0x%06x-0x%06x\n", 0, STRamEnd);
        !           709:        end = TosAddress + TosSize;
        !           710:        fprintf(out, "ROM_TOS:\t0x%06x-0x%06x\n", TosAddress, end);
        !           711:        fprintf(out, "CARTRIDGE:\t0x%06x-0x%06x\n", CART_START, CART_END);
1.1       root      712:        text = DebugInfo_GetTEXT();
1.1.1.3 ! root      713:        if (text && (text < TosAddress || text >= TTRAM_START)) {
1.1       root      714:                fprintf(out, "PROGRAM_TEXT:\t0x%06x-0x%06x\n", text, DebugInfo_GetTEXTEnd());
                    715:        }
1.1.1.3 ! root      716:        if (TTmemory && ConfigureParams.Memory.nTTRamSize) {
        !           717:                end = TTRAM_START + 1024*1024*ConfigureParams.Memory.nTTRamSize;
        !           718:                fprintf(out, "TT_RAM:\t\t0x%08x-0x%08x\n", TTRAM_START, end);
        !           719:        } else if (end < CART_END) {
        !           720:                end = CART_END;
        !           721:        }
        !           722:        Profile_CpuShowAddresses(0, end-2, out);
1.1       root      723:        Profile_CpuShowCallers(out);
                    724: }
                    725: 
                    726: /* ------------------ CPU profile control ----------------- */
                    727: 
                    728: /**
                    729:  * Initialize CPU profiling when necessary.  Return true if profiling.
                    730:  */
                    731: bool Profile_CpuStart(void)
                    732: {
                    733:        int size;
                    734: 
                    735:        Profile_FreeCallinfo(&(cpu_callinfo));
                    736:        if (cpu_profile.sort_arr) {
                    737:                /* remove previous results */
                    738:                free(cpu_profile.sort_arr);
                    739:                free(cpu_profile.data);
                    740:                cpu_profile.sort_arr = NULL;
                    741:                cpu_profile.data = NULL;
                    742:                printf("Freed previous CPU profile buffers.\n");
                    743:        }
                    744:        if (!cpu_profile.enabled) {
                    745:                return false;
                    746:        }
                    747:        /* zero everything */
                    748:        memset(&cpu_profile, 0, sizeof(cpu_profile));
                    749: 
                    750:        /* Shouldn't change within same debug session */
1.1.1.3 ! root      751:        size = (STRamEnd + CART_SIZE + TosSize) / 2;
        !           752:        if (TTmemory && ConfigureParams.Memory.nTTRamSize) {
        !           753:                size += ConfigureParams.Memory.nTTRamSize * 1024*1024/2;
        !           754:        }
1.1       root      755: 
                    756:        /* Add one entry for catching invalid PC values */
                    757:        cpu_profile.data = calloc(size + 1, sizeof(*cpu_profile.data));
                    758:        if (!cpu_profile.data) {
                    759:                perror("ERROR, new CPU profile buffer alloc failed");
                    760:                return false;
                    761:        }
                    762:        printf("Allocated CPU profile buffer (%d MB).\n",
                    763:               (int)sizeof(*cpu_profile.data)*size/(1024*1024));
                    764:        cpu_profile.size = size;
                    765: 
                    766:        Profile_AllocCallinfo(&(cpu_callinfo), Symbols_CpuCount(), "CPU");
                    767: 
                    768:        /* special hack for EmuTOS */
                    769:        etos_switcher = PC_UNDEFINED;
                    770:        if (cpu_callinfo.sites && bIsEmuTOS &&
                    771:            (!Symbols_GetCpuAddress(SYMTYPE_TEXT, "_switchto", &etos_switcher) || etos_switcher < TosAddress)) {
                    772:                etos_switcher = PC_UNDEFINED;
                    773:        }
                    774: 
1.1.1.3 ! root      775:        /* reset cache stats (CPU emulation doesn't do that) */
        !           776:        CpuInstruction.D_Cache_hit = 0;
        !           777:        CpuInstruction.I_Cache_hit = 0;
        !           778:        CpuInstruction.I_Cache_miss = 0;
        !           779:        CpuInstruction.D_Cache_miss = 0;
1.1       root      780: 
1.1.1.3 ! root      781:        cpu_profile.prev_cycles = CyclesGlobalClockCounter;
        !           782:        cpu_profile.prev_family = OpcodeFamily;
        !           783:        cpu_profile.prev_pc = M68000_GetPC();
        !           784:        if (ConfigureParams.System.bAddressSpace24) {
        !           785:                cpu_profile.prev_pc &= 0xffffff;
        !           786:        }
1.1.1.2   root      787:        cpu_profile.loop_start = PC_UNDEFINED;
                    788:        cpu_profile.loop_end = PC_UNDEFINED;
                    789:        cpu_profile.loop_count = 0;
                    790:        Profile_LoopReset();
                    791: 
1.1       root      792:        cpu_profile.disasm_addr = 0;
                    793:        cpu_profile.processed = false;
                    794:        cpu_profile.enabled = true;
                    795:        return cpu_profile.enabled;
                    796: }
                    797: 
                    798: /**
                    799:  * return true if pc could be next instruction for previous pc
                    800:  */
                    801: static bool is_prev_instr(Uint32 prev_pc, Uint32 pc)
                    802: {
                    803:        /* just moved to next instruction (1-2 words)? */
                    804:        if (prev_pc < pc && (pc - prev_pc) <= 10) {
                    805:                return true;
                    806:        }
                    807:        return false;
                    808: }
                    809: 
                    810: /**
                    811:  * return caller instruction type classification
                    812:  */
                    813: static calltype_t cpu_opcode_type(int family, Uint32 prev_pc, Uint32 pc)
                    814: {
                    815:        switch (family) {
                    816: 
                    817:        case i_JSR:
                    818:        case i_BSR:
                    819:                return CALL_SUBROUTINE;
                    820: 
                    821:        case i_RTS:
                    822:        case i_RTR:
                    823:        case i_RTD:
                    824:                return CALL_SUBRETURN;
                    825: 
                    826:        case i_JMP:     /* often used also for "inlined" function calls... */
                    827:        case i_Bcc:     /* both BRA & BCC */
                    828:        case i_FBcc:
                    829:        case i_DBcc:
                    830:        case i_FDBcc:
                    831:                return CALL_BRANCH;
                    832: 
                    833:        case i_TRAP:
                    834:        case i_TRAPV:
                    835:        case i_TRAPcc:
                    836:        case i_FTRAPcc:
                    837:        case i_STOP:
                    838:        case i_ILLG:
                    839:        case i_CHK:
                    840:        case i_CHK2:
                    841:        case i_BKPT:
                    842:                return CALL_EXCEPTION;
                    843: 
                    844:        case i_RTE:
                    845:                return CALL_EXCRETURN;
                    846:        }
                    847:        /* just moved to next instruction? */
                    848:        if (is_prev_instr(prev_pc, pc)) {
                    849:                return CALL_NEXT;
                    850:        }
                    851:        return CALL_UNKNOWN;
                    852: }
                    853: 
                    854: /**
                    855:  * If call tracking is enabled (there are symbols), collect
                    856:  * information about subroutine and other calls, and their costs.
                    857:  * 
                    858:  * Like with profile data, caller info checks need to be for previous
                    859:  * instruction, that's why "pc" argument for this function actually
                    860:  * needs to be previous PC.
                    861:  */
                    862: static void collect_calls(Uint32 pc, counters_t *counters)
                    863: {
                    864:        calltype_t flag;
                    865:        int idx, family;
                    866:        Uint32 prev_pc, caller_pc;
                    867: 
                    868:        family = cpu_profile.prev_family;
                    869:        cpu_profile.prev_family = OpcodeFamily;
                    870: 
                    871:        prev_pc = cpu_callinfo.prev_pc;
                    872:        cpu_callinfo.prev_pc = pc;
                    873:        caller_pc = PC_UNDEFINED;
                    874: 
                    875:        /* address is return address for last subroutine call? */
                    876:        if (unlikely(pc == cpu_callinfo.return_pc) && likely(cpu_callinfo.depth)) {
                    877: 
                    878:                flag = cpu_opcode_type(family, prev_pc, pc);
1.1.1.2   root      879:                /* previous address can be exception return (e.g. RTE) instead of RTS,
                    880:                 * if exception occurred right after returning from subroutine call.
1.1       root      881:                 */
                    882:                if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) {
                    883:                        caller_pc = Profile_CallEnd(&cpu_callinfo, counters);
                    884:                } else {
                    885: #if DEBUG
                    886:                        /* although at return address, it didn't return yet,
                    887:                         * e.g. because there was a jsr or jump to return address
                    888:                         */
                    889:                        Uint32 nextpc;
1.1.1.2   root      890:                        fprintf(stderr, "WARNING: subroutine call returned 0x%x -> 0x%x, not through RTS!\n", prev_pc, pc);
1.1       root      891:                        Disasm(stderr, prev_pc, &nextpc, 1);
                    892: #endif
                    893:                }
1.1.1.2   root      894:                /* next address might be another symbol, so need to fall through */
1.1       root      895:        }
                    896: 
                    897:        /* address is one which we're tracking? */
                    898:        idx = Symbols_GetCpuAddressIndex(pc);
                    899:        if (unlikely(idx >= 0)) {
                    900: 
                    901:                flag = cpu_opcode_type(family, prev_pc, pc);
1.1.1.2   root      902:                if (flag == CALL_SUBROUTINE || flag == CALL_EXCEPTION) {
1.1       root      903:                        /* special HACK for for EmuTOS AES switcher which
                    904:                         * changes stack content to remove itself from call
                    905:                         * stack and uses RTS for subroutine *calls*, not
                    906:                         * for returning from them.
                    907:                         *
                    908:                         * It wouldn't be reliable to detect calls from it,
                    909:                         * so I'm making call *to* it show up as branch, to
                    910:                         * keep callstack depth correct.
                    911:                         */
                    912:                        if (unlikely(pc == etos_switcher)) {
                    913:                                flag = CALL_BRANCH;
                    914:                        } else if (unlikely(prev_pc == PC_UNDEFINED)) {
                    915:                                /* if first profiled instruction
                    916:                                 * is subroutine call, it doesn't have
                    917:                                 * valid prev_pc value stored
                    918:                                 */
                    919:                                cpu_callinfo.return_pc = PC_UNDEFINED;
1.1.1.3 ! root      920:                                fprintf(stderr, "WARNING: previous PC for tracked address 0x%d is undefined!\n", pc);
1.1       root      921: #if DEBUG
                    922:                                skip_assert = true;
                    923:                                DebugUI(REASON_CPU_EXCEPTION);
                    924: #endif
                    925:                        } else {
                    926:                                /* slow! */
                    927:                                cpu_callinfo.return_pc = Disasm_GetNextPC(prev_pc);
                    928:                        }
                    929:                } else if (caller_pc != PC_UNDEFINED) {
1.1.1.2   root      930:                        /* returned from function to first instruction of another symbol:
1.1       root      931:                         *      0xf384  jsr some_function
                    932:                         *      other_symbol:
                    933:                         *      0f3x8a  some_instruction
                    934:                         * -> change return instruction address to
                    935:                         *    address of what did the returned call.
                    936:                         */
                    937:                        prev_pc = caller_pc;
                    938:                        assert(is_prev_instr(prev_pc, pc));
                    939:                        flag = CALL_NEXT;
                    940:                }
                    941:                Profile_CallStart(idx, &cpu_callinfo, prev_pc, flag, pc, counters);
                    942:        }
                    943: }
                    944: 
                    945: /**
1.1.1.2   root      946:  * log last loop info, if there's suitable data for one
                    947:  */
                    948: static void log_last_loop(void)
                    949: {
                    950:        unsigned len = cpu_profile.loop_end - cpu_profile.loop_start;
                    951:        if (cpu_profile.loop_count > 1 && (len < profile_loop.cpu_limit || !profile_loop.cpu_limit)) {
                    952:                fprintf(profile_loop.fp, "CPU %d 0x%06x %d %d\n", nVBLs,
                    953:                        cpu_profile.loop_start, len, cpu_profile.loop_count);
                    954:        }
                    955: }
                    956: 
1.1.1.3 ! root      957: # if DEBUG || ENABLE_WINUAE_CPU
        !           958: /**
        !           959:  * Warning for values going out of expected range
        !           960:  */
        !           961: static Uint32 warn_too_large(const char *name, const int value, const int limit, const Uint32 prev_pc, const Uint32 pc)
        !           962: {
        !           963:        Uint32 nextpc;
        !           964:        fprintf(stderr, "WARNING: unexpected (%d > %d) %s at 0x%x:\n", value, limit - 1, name, pc);
        !           965:        Disasm(stderr, prev_pc, &nextpc, 1);
        !           966:        Disasm(stderr, pc, &nextpc, 1);
        !           967: #if DEBUG
        !           968:        skip_assert = true;
        !           969:        DebugUI(REASON_CPU_EXCEPTION);
        !           970: #endif
        !           971:        return limit - 1;
        !           972: }
        !           973: #endif
        !           974: 
1.1.1.2   root      975: /**
1.1       root      976:  * Update CPU cycle and count statistics for PC address.
                    977:  *
                    978:  * This gets called after instruction has executed and PC
                    979:  * has advanced to next instruction.
                    980:  */
                    981: void Profile_CpuUpdate(void)
                    982: {
                    983:        counters_t *counters = &(cpu_profile.all);
1.1.1.3 ! root      984:        Uint32 pc, prev_pc, idx, cycles;
1.1       root      985:        cpu_profile_item_t *prev;
1.1.1.3 ! root      986: #if ENABLE_WINUAE_CPU
        !           987:        Uint32 i_hits, d_hits, i_misses, d_misses;
        !           988: #else
        !           989:        const Uint32 i_misses = 0, d_hits = 0;
        !           990: #endif
1.1       root      991: 
                    992:        prev_pc = cpu_profile.prev_pc;
1.1.1.3 ! root      993:        /* PC may have extra bits when using 24 bit addressing, they need to be masked away as
1.1       root      994:         * emulation itself does that too when PC value is used
                    995:         */
1.1.1.3 ! root      996:        cpu_profile.prev_pc = pc = M68000_GetPC();
        !           997:        if (ConfigureParams.System.bAddressSpace24) {
        !           998:                cpu_profile.prev_pc &= 0xffffff;
        !           999:        }
1.1.1.2   root     1000:        if (unlikely(profile_loop.fp)) {
                   1001:                if (pc < prev_pc) {
                   1002:                        if (pc == cpu_profile.loop_start && prev_pc == cpu_profile.loop_end) {
                   1003:                                cpu_profile.loop_count++;
                   1004:                        } else {
                   1005:                                cpu_profile.loop_start = pc;
                   1006:                                cpu_profile.loop_end = prev_pc;
                   1007:                                cpu_profile.loop_count = 1;
                   1008:                        }
                   1009:                } else {
                   1010:                        if (pc > cpu_profile.loop_end) {
                   1011:                                log_last_loop();
1.1.1.3 ! root     1012:                                cpu_profile.loop_end = 0xffffffff;
1.1.1.2   root     1013:                                cpu_profile.loop_count = 0;
                   1014:                        }
                   1015:                }
                   1016:        }
                   1017: 
1.1       root     1018:        idx = address2index(prev_pc);
                   1019:        assert(idx <= cpu_profile.size);
                   1020:        prev = cpu_profile.data + idx;
                   1021: 
                   1022:        if (likely(prev->count < MAX_CPU_PROFILE_VALUE)) {
                   1023:                prev->count++;
                   1024:        }
                   1025: 
1.1.1.3 ! root     1026:        cycles = CyclesGlobalClockCounter - cpu_profile.prev_cycles;
        !          1027:        cpu_profile.prev_cycles = CyclesGlobalClockCounter;
1.1       root     1028: 
                   1029:        if (likely(prev->cycles < MAX_CPU_PROFILE_VALUE - cycles)) {
                   1030:                prev->cycles += cycles;
                   1031:        } else {
                   1032:                prev->cycles = MAX_CPU_PROFILE_VALUE;
                   1033:        }
                   1034: 
                   1035: #if ENABLE_WINUAE_CPU
1.1.1.3 ! root     1036:        /* only WinUAE CPU core provides cache information */
        !          1037:        i_hits = CpuInstruction.I_Cache_hit;
        !          1038:        d_hits = CpuInstruction.D_Cache_hit;
        !          1039:        i_misses = CpuInstruction.I_Cache_miss;
        !          1040:        d_misses = CpuInstruction.D_Cache_miss;
        !          1041: 
        !          1042:        /* reset cache stats after reading them (for the next instruction) */
        !          1043:        CpuInstruction.I_Cache_hit = 0;
        !          1044:        CpuInstruction.D_Cache_hit = 0;
        !          1045:        CpuInstruction.I_Cache_miss = 0;
        !          1046:        CpuInstruction.D_Cache_miss = 0;
        !          1047: 
        !          1048:        /* tracked for every address */
        !          1049:        if (likely(prev->i_misses < MAX_CPU_PROFILE_VALUE - i_misses)) {
        !          1050:                prev->i_misses += i_misses;
1.1       root     1051:        } else {
1.1.1.3 ! root     1052:                prev->i_misses = MAX_CPU_PROFILE_VALUE;
1.1       root     1053:        }
1.1.1.3 ! root     1054:        if (likely(prev->d_hits < MAX_CPU_PROFILE_VALUE - d_hits)) {
        !          1055:                prev->d_hits += d_hits;
        !          1056:        } else {
        !          1057:                prev->d_hits = MAX_CPU_PROFILE_VALUE;
        !          1058:        }
        !          1059: 
        !          1060:        /* tracking for histogram, check for array overflows */
        !          1061:        if (unlikely(i_hits >= MAX_I_HITS)) {
        !          1062:                i_hits = warn_too_large("number of CPU instruction cache hits", i_hits, MAX_I_HITS, prev_pc, pc);
        !          1063:        }
        !          1064:        cpu_profile.i_hit_counts[i_hits]++;
        !          1065: 
        !          1066:        if (unlikely(i_misses >= MAX_I_MISSES)) {
        !          1067:                i_misses = warn_too_large("number of CPU instruction cache misses", i_misses, MAX_I_MISSES, prev_pc, pc);
        !          1068:        }
        !          1069:        cpu_profile.i_miss_counts[i_misses]++;
        !          1070: 
        !          1071:        if (unlikely(d_hits >= MAX_D_HITS)) {
        !          1072:                d_hits = warn_too_large("number of CPU data cache hits", d_hits, MAX_D_HITS, prev_pc, pc);
        !          1073:        }
        !          1074:        cpu_profile.d_hit_counts[d_hits]++;
        !          1075: 
        !          1076:        if (unlikely(d_misses >= MAX_D_MISSES)) {
        !          1077:                d_misses = warn_too_large("number of CPU data cache misses", d_misses, MAX_D_MISSES, prev_pc, pc);
        !          1078:        }
        !          1079:        cpu_profile.d_miss_counts[d_misses]++;
1.1       root     1080: #endif
1.1.1.3 ! root     1081: 
1.1       root     1082:        if (cpu_callinfo.sites) {
                   1083:                collect_calls(prev_pc, counters);
                   1084:        }
                   1085:        /* counters are increased after caller info is processed,
                   1086:         * otherwise cost for the instruction calling the callee
                   1087:         * doesn't get accounted to caller (but callee).
                   1088:         */
                   1089:        counters->count++;
1.1.1.3 ! root     1090:        counters->cycles += cycles;
        !          1091:        counters->i_misses += i_misses;
        !          1092:        counters->d_hits += d_hits;
1.1       root     1093: 
                   1094: #if DEBUG
                   1095:        if (unlikely(OpcodeFamily == 0)) {
                   1096:                Uint32 nextpc;
                   1097:                fputs("WARNING: instruction opcode family is zero (=i_ILLG) for instruction:\n", stderr);
                   1098:                Disasm(stderr, prev_pc, &nextpc, 1);
                   1099:        }
                   1100:        /* catch too large (and negative) cycles for other than STOP instruction */
                   1101:        if (unlikely(cycles > 512 && OpcodeFamily != i_STOP)) {
1.1.1.3 ! root     1102:                warn_too_large("cycles", cycles, 512, prev_pc, pc);
1.1       root     1103:        }
1.1.1.3 ! root     1104: # if !ENABLE_WINUAE_CPU
        !          1105:        {
        !          1106:                static Uint32 prev_cycles = 0, prev_pc2 = 0;
        !          1107:                if (unlikely(cycles == 0 && prev_cycles == 0)) {
        !          1108:                        Uint32 nextpc;
        !          1109:                        fputs("WARNING: Zero cycles for successive opcodes:\n", stderr);
        !          1110:                        Disasm(stderr, prev_pc2, &nextpc, 1);
        !          1111:                        Disasm(stderr, prev_pc, &nextpc, 1);
        !          1112:                }
        !          1113:                prev_cycles = cycles;
        !          1114:                prev_pc2 = prev_pc;
1.1       root     1115:        }
1.1.1.3 ! root     1116: # endif
1.1       root     1117: #endif
                   1118: }
                   1119: 
                   1120: 
                   1121: /**
                   1122:  * Helper for accounting CPU profile area item.
                   1123:  */
                   1124: static void update_area_item(profile_area_t *area, Uint32 addr, cpu_profile_item_t *item)
                   1125: {
                   1126:        Uint32 cycles = item->cycles;
                   1127:        Uint32 count = item->count;
                   1128: 
                   1129:        if (!count) {
                   1130:                return;
                   1131:        }
                   1132:        area->counters.count += count;
                   1133:        area->counters.cycles += cycles;
1.1.1.3 ! root     1134:        area->counters.i_misses += item->i_misses;
        !          1135:        area->counters.d_hits += item->d_hits;
1.1       root     1136: 
                   1137:        if (cycles == MAX_CPU_PROFILE_VALUE) {
                   1138:                area->overflow = true;
                   1139:        }
                   1140:        if (addr < area->lowest) {
                   1141:                area->lowest = addr;
                   1142:        }
                   1143:        area->highest = addr;
                   1144: 
                   1145:        area->active++;
                   1146: }
                   1147: 
                   1148: /**
                   1149:  * Helper for collecting CPU profile area statistics.
                   1150:  */
                   1151: static Uint32 update_area(profile_area_t *area, Uint32 start, Uint32 end)
                   1152: {
                   1153:        cpu_profile_item_t *item;
                   1154:        Uint32 addr;
                   1155: 
                   1156:        memset(area, 0, sizeof(profile_area_t));
                   1157:        area->lowest = cpu_profile.size;
                   1158: 
                   1159:        item = &(cpu_profile.data[start]);
                   1160:        for (addr = start; addr < end; addr++, item++) {
                   1161:                update_area_item(area, addr, item);
                   1162:        }
                   1163:        return addr;
                   1164: }
                   1165: 
                   1166: /**
                   1167:  * Helper for initializing CPU profile area sorting indexes.
                   1168:  */
                   1169: static Uint32* index_area(profile_area_t *area, Uint32 *sort_arr)
                   1170: {
                   1171:        cpu_profile_item_t *item;
                   1172:        Uint32 addr;
                   1173: 
                   1174:        item = &(cpu_profile.data[area->lowest]);
                   1175:        for (addr = area->lowest; addr <= area->highest; addr++, item++) {
                   1176:                if (item->count) {
                   1177:                        *sort_arr++ = addr;
                   1178:                }
                   1179:        }
                   1180:        return sort_arr;
                   1181: }
                   1182: 
                   1183: /**
                   1184:  * Stop and process the CPU profiling data; collect stats and
                   1185:  * prepare for more optimal sorting.
                   1186:  */
                   1187: void Profile_CpuStop(void)
                   1188: {
                   1189:        Uint32 *sort_arr, next;
1.1.1.3 ! root     1190:        unsigned int size, stsize;
1.1       root     1191:        int active;
                   1192: 
                   1193:        if (cpu_profile.processed || !cpu_profile.enabled) {
                   1194:                return;
                   1195:        }
1.1.1.2   root     1196: 
                   1197:        log_last_loop();
                   1198:        if (profile_loop.fp) {
                   1199:                fflush(profile_loop.fp);
                   1200:        }
                   1201: 
1.1       root     1202:        /* user didn't change RAM or TOS size in the meanwhile? */
1.1.1.3 ! root     1203:        size = stsize = (STRamEnd + CART_SIZE + TosSize) / 2;
        !          1204:        if (TTmemory && ConfigureParams.Memory.nTTRamSize) {
        !          1205:                size += ConfigureParams.Memory.nTTRamSize * 1024*1024/2;
        !          1206:        }
        !          1207:        assert(cpu_profile.size == size);
1.1       root     1208: 
                   1209:        Profile_FinalizeCalls(&(cpu_callinfo), &(cpu_profile.all), Symbols_GetByCpuAddress);
                   1210: 
                   1211:        /* find lowest and highest addresses executed etc */
                   1212:        next = update_area(&cpu_profile.ram, 0, STRamEnd/2);
1.1.1.3 ! root     1213:        if (TosAddress < CART_START) {
        !          1214:                next = update_area(&cpu_profile.tos, next, (STRamEnd + TosSize)/2);
        !          1215:                next = update_area(&cpu_profile.rom, next, stsize);
        !          1216:        } else {
        !          1217:                next = update_area(&cpu_profile.rom, next, (STRamEnd + CART_SIZE)/2);
        !          1218:                next = update_area(&cpu_profile.tos, next, stsize);
        !          1219:        }
        !          1220:        next = update_area(&cpu_profile.ttram, next, size);
        !          1221:        assert(next == size);
1.1       root     1222: 
                   1223: #if DEBUG
                   1224:        if (skip_assert) {
                   1225:                skip_assert = false;
                   1226:        } else
                   1227: #endif
                   1228:        {
1.1.1.3 ! root     1229: #if DEBUG
        !          1230:                if (cpu_profile.all.count != cpu_profile.ttram.counters.count + cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count) {
        !          1231:                        fprintf(stderr, "ERROR, instruction count mismatch:\n\t%"PRIu64" != %"PRIu64" + %"PRIu64" + %"PRIu64" + %"PRIu64"?\n",
        !          1232:                                cpu_profile.all.count, cpu_profile.ttram.counters.count, cpu_profile.ram.counters.count,
        !          1233:                                cpu_profile.tos.counters.count, cpu_profile.rom.counters.count);
        !          1234:                        fprintf(stderr, "If there was debugger invocation from profiling before this, try with profiler DEBUG define disabled!!!\n");
        !          1235:                }
        !          1236: #endif
        !          1237:                assert(cpu_profile.all.count == cpu_profile.ttram.counters.count + cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count);
        !          1238:                assert(cpu_profile.all.cycles == cpu_profile.ttram.counters.cycles + cpu_profile.ram.counters.cycles + cpu_profile.tos.counters.cycles + cpu_profile.rom.counters.cycles);
        !          1239:                assert(cpu_profile.all.i_misses == cpu_profile.ttram.counters.i_misses + cpu_profile.ram.counters.i_misses + cpu_profile.tos.counters.i_misses + cpu_profile.rom.counters.i_misses);
        !          1240:                assert(cpu_profile.all.d_hits == cpu_profile.ttram.counters.d_hits + cpu_profile.ram.counters.d_hits + cpu_profile.tos.counters.d_hits + cpu_profile.rom.counters.d_hits);
1.1       root     1241:        }
                   1242: 
                   1243:        /* allocate address array for sorting */
1.1.1.3 ! root     1244:        active = cpu_profile.ttram.active + cpu_profile.ram.active + cpu_profile.rom.active + cpu_profile.tos.active;
1.1       root     1245:        sort_arr = calloc(active, sizeof(*sort_arr));
                   1246: 
                   1247:        if (!sort_arr) {
                   1248:                perror("ERROR: allocating CPU profile address data");
                   1249:                free(cpu_profile.data);
                   1250:                cpu_profile.data = NULL;
                   1251:                return;
                   1252:        }
                   1253:        printf("Allocated CPU profile address buffer (%d KB).\n",
                   1254:               (int)sizeof(*sort_arr)*(active+512)/1024);
                   1255:        cpu_profile.sort_arr = sort_arr;
                   1256:        cpu_profile.active = active;
                   1257: 
                   1258:        /* and fill addresses for used instructions... */
                   1259:        sort_arr = index_area(&cpu_profile.ram, sort_arr);
                   1260:        sort_arr = index_area(&cpu_profile.tos, sort_arr);
                   1261:        sort_arr = index_area(&cpu_profile.rom, sort_arr);
1.1.1.3 ! root     1262:        sort_arr = index_area(&cpu_profile.ttram, sort_arr);
1.1       root     1263:        assert(sort_arr == cpu_profile.sort_arr + cpu_profile.active);
                   1264:        //printf("%d/%d/%d\n", area->active, sort_arr-cpu_profile.sort_arr, active);
                   1265: 
                   1266:        Profile_CpuShowStats();
                   1267:        cpu_profile.processed = true;
                   1268: }
                   1269: 
                   1270: /**
                   1271:  * Get pointers to CPU profile enabling and disasm address variables
                   1272:  * for updating them (in parser).
                   1273:  */
                   1274: void Profile_CpuGetPointers(bool **enabled, Uint32 **disasm_addr)
                   1275: {
                   1276:        *disasm_addr = &cpu_profile.disasm_addr;
                   1277:        *enabled = &cpu_profile.enabled;
                   1278: }
                   1279: 
                   1280: /**
                   1281:  * Get callinfo & symbol search pointers for stack walking.
                   1282:  */
                   1283: void Profile_CpuGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32))
                   1284: {
                   1285:        *callinfo = &(cpu_callinfo);
                   1286:        *get_symbol = Symbols_GetByCpuAddress;
                   1287: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.