Annotation of hatari/src/debug/profilecpu.c, revision 1.1.1.2

1.1       root        1: /*
                      2:  * Hatari - profilecpu.c
                      3:  * 
                      4:  * Copyright (C) 2010-2013 by Eero Tamminen
                      5:  *
                      6:  * This file is distributed under the GNU General Public License, version 2
                      7:  * or at your option any later version. Read the file gpl.txt for details.
                      8:  *
                      9:  * profilecpu.c - functions for profiling CPU and showing the results.
                     10:  */
                     11: const char Profilecpu_fileid[] = "Hatari profilecpu.c : " __DATE__ " " __TIME__;
                     12: 
                     13: #include <stdio.h>
                     14: #include <inttypes.h>
                     15: #include <assert.h>
                     16: #include "main.h"
                     17: #include "configuration.h"
                     18: #include "clocks_timings.h"
                     19: #include "debugInfo.h"
                     20: #include "dsp.h"
                     21: #include "m68000.h"
                     22: #include "68kDisass.h"
                     23: #include "profile.h"
                     24: #include "profile_priv.h"
                     25: #include "stMemory.h"
                     26: #include "symbols.h"
                     27: #include "tos.h"
1.1.1.2 ! root       28: #include "screen.h"
        !            29: #include "video.h"
        !            30: 
        !            31: 
        !            32: /* cartridge area */
        !            33: #define CART_START     0xFA0000
        !            34: #define CART_END       0xFC0000
        !            35: #define CART_SIZE      (CART_END - CART_START)
        !            36: 
1.1       root       37: 
                     38: /* if non-zero, output (more) warnings on suspicious:
                     39:  * - cycle/instruction counts
                     40:  * - PC switches
                     41:  * And drop to debugger on invalid PC addresses.
                     42:  */
                     43: #define DEBUG 0
                     44: #if DEBUG
                     45: #include "debugui.h"
                     46: static bool skip_assert;
                     47: #endif
                     48: 
                     49: static callinfo_t cpu_callinfo;
                     50: 
                     51: /* This is relevant with WinUAE CPU core:
                     52:  * - the default cycle exact variant needs this define to be non-zero
                     53:  * - non-cycle exact and MMU variants need this define to be 0
                     54:  *   for cycle counts to make any sense
                     55:  */
                     56: #define USE_CYCLES_COUNTER 1
                     57: 
                     58: #define MAX_CPU_PROFILE_VALUE 0xFFFFFFFF
                     59: 
                     60: typedef struct {
                     61:        Uint32 count;   /* how many times this address instrcution is executed */
                     62:        Uint32 cycles;  /* how many CPU cycles was taken at this address */
                     63:        Uint32 misses;  /* how many CPU cache misses happened at this address */
                     64: } cpu_profile_item_t;
                     65: 
                     66: #define MAX_MISS 4
                     67: 
                     68: static struct {
                     69:        counters_t all;       /* total counts for all areas */
                     70:        Uint32 miss_counts[MAX_MISS];  /* cache miss counts */
                     71:        cpu_profile_item_t *data; /* profile data items */
                     72:        Uint32 size;          /* number of allocated profile data items */
                     73:        profile_area_t ram;   /* normal RAM stats */
                     74:        profile_area_t rom;   /* cartridge ROM stats */
                     75:        profile_area_t tos;   /* ROM TOS stats */
                     76:        int active;           /* number of active data items in all areas */
                     77:        Uint32 *sort_arr;     /* data indexes used for sorting */
1.1.1.2 ! root       78:        int prev_family;      /* previous instruction opcode family */
1.1       root       79:        Uint32 prev_cycles;   /* previous instruction cycles counter */
                     80:        Uint32 prev_pc;       /* previous instruction address */
1.1.1.2 ! root       81:        Uint32 loop_start;    /* address of last loop start */
        !            82:        Uint32 loop_end;      /* address of last loop end */
        !            83:        Uint32 loop_count;    /* how many times it was looped */
1.1       root       84:        Uint32 disasm_addr;   /* 'addresses' command start address */
                     85:        bool processed;       /* true when data is already processed */
                     86:        bool enabled;         /* true when profiling enabled */
                     87: } cpu_profile;
                     88: 
                     89: /* special hack for EmuTOS */
                     90: static Uint32 etos_switcher;
                     91: 
                     92: 
                     93: /* ------------------ CPU profile address mapping ----------------- */
                     94: 
                     95: /**
                     96:  * convert Atari memory address to sorting array profile data index.
                     97:  */
                     98: static inline Uint32 address2index(Uint32 pc)
                     99: {
                    100:        if (unlikely(pc & 1)) {
                    101:                fprintf(stderr, "WARNING: odd CPU profile instruction address 0x%x!\n", pc);
                    102: #if DEBUG
                    103:                skip_assert = true;
                    104:                DebugUI(REASON_CPU_EXCEPTION);
                    105: #endif
                    106:        }
1.1.1.2 ! root      107:        if (pc < STRamEnd) {
        !           108:                /* most likely case, use RAM address as-is */
        !           109: 
        !           110:        } else if (pc >= TosAddress && pc < TosAddress + TosSize) {
1.1       root      111:                /* TOS, put it after RAM data */
                    112:                pc = pc - TosAddress + STRamEnd;
1.1.1.2 ! root      113:                if (TosAddress >= CART_END) {
        !           114:                        /* and after cartridge data as it's higher */
        !           115:                        pc += CART_SIZE;
        !           116:                }
        !           117:        } else if (pc >= CART_START && pc < CART_END) {
        !           118:                /* ROM, put it after RAM data */
        !           119:                pc = pc - CART_START + STRamEnd;
        !           120:                if (TosAddress < CART_START) {
        !           121:                        /* and after TOS as it's higher */
        !           122:                        pc += TosSize;
        !           123:                }
1.1       root      124:        } else {
1.1.1.2 ! root      125:                fprintf(stderr, "WARNING: 'invalid' CPU PC profile instruction address 0x%x!\n", pc);
        !           126:                /* extra entry at end is reserved for invalid PC values */
        !           127:                pc = STRamEnd + TosSize + 0x20000;
1.1       root      128: #if DEBUG
1.1.1.2 ! root      129:                skip_assert = true;
        !           130:                DebugUI(REASON_CPU_EXCEPTION);
1.1       root      131: #endif
                    132:        }
                    133:        /* CPU instructions are at even addresses, save space by halving */
                    134:        return (pc >> 1);
                    135: }
                    136: 
                    137: /**
                    138:  * convert sorting array profile data index to Atari memory address.
                    139:  */
                    140: static Uint32 index2address(Uint32 idx)
                    141: {
                    142:        idx <<= 1;
                    143:        /* RAM */
                    144:        if (idx < STRamEnd) {
                    145:                return idx;
                    146:        }
                    147:        idx -= STRamEnd;
1.1.1.2 ! root      148:        /* TOS before cartridge area? */
        !           149:        if (TosAddress < CART_START) {
        !           150:                /* TOS */
        !           151:                if (idx < TosSize) {
        !           152:                        return idx + TosAddress;
        !           153:                }
        !           154:                idx -= TosSize;
        !           155:                /* ROM */
        !           156:                return idx + CART_START;
        !           157:        } else {
        !           158:                /* ROM */
        !           159:                if (idx < CART_SIZE) {
        !           160:                        return idx + CART_START;
        !           161:                }
        !           162:                idx -= CART_SIZE;
        !           163:                /* TOS */
1.1       root      164:                return idx + TosAddress;
                    165:        }
                    166: }
                    167: 
                    168: /* ------------------ CPU profile results ----------------- */
                    169: 
                    170: /**
                    171:  * Get CPU cycles, count and count percentage for given address.
                    172:  * Return true if data was available and non-zero, false otherwise.
                    173:  */
                    174: bool Profile_CpuAddressData(Uint32 addr, float *percentage, Uint32 *count, Uint32 *cycles, Uint32 *misses)
                    175: {
                    176:        Uint32 idx;
                    177:        if (!cpu_profile.data) {
                    178:                return false;
                    179:        }
                    180:        idx = address2index(addr);
                    181:        *misses = cpu_profile.data[idx].misses;
                    182:        *cycles = cpu_profile.data[idx].cycles;
                    183:        *count = cpu_profile.data[idx].count;
                    184:        if (cpu_profile.all.count) {
                    185:                *percentage = 100.0*(*count)/cpu_profile.all.count;
                    186:        } else {
                    187:                *percentage = 0.0;
                    188:        }
                    189:        return (*count > 0);
                    190: }
                    191: 
                    192: /**
                    193:  * Helper to show statistics for specified CPU profile area.
                    194:  */
                    195: static void show_cpu_area_stats(profile_area_t *area)
                    196: {
                    197:        if (!area->active) {
                    198:                fprintf(stderr, "- no activity\n");
                    199:                return;
                    200:        }
                    201:        fprintf(stderr, "- active address range:\n  0x%06x-0x%06x\n",
                    202:                index2address(area->lowest),
                    203:                index2address(area->highest));
                    204:        fprintf(stderr, "- active instruction addresses:\n  %d (%.2f%% of all)\n",
                    205:                area->active,
                    206:                100.0 * area->active / cpu_profile.active);
                    207:        fprintf(stderr, "- executed instructions:\n  %"PRIu64" (%.2f%% of all)\n",
                    208:                area->counters.count,
                    209:                100.0 * area->counters.count / cpu_profile.all.count);
                    210: #if ENABLE_WINUAE_CPU
                    211:        if (cpu_profile.all.misses) {   /* CPU cache in use? */
                    212:                fprintf(stderr, "- instruction cache misses:\n  %"PRIu64" (%.2f%% of all)\n",
                    213:                        area->counters.misses,
                    214:                        100.0 * area->counters.misses / cpu_profile.all.misses);
                    215:        }
                    216: #endif
                    217:        fprintf(stderr, "- used cycles:\n  %"PRIu64" (%.2f%% of all)\n  = %.5fs\n",
                    218:                area->counters.cycles,
                    219:                100.0 * area->counters.cycles / cpu_profile.all.cycles,
                    220:                (double)area->counters.cycles / MachineClocks.CPU_Freq);
                    221:        if (area->overflow) {
                    222:                fprintf(stderr, "  *** COUNTER OVERFLOW! ***\n");
                    223:        }
                    224: }
                    225: 
                    226: 
                    227: /**
                    228:  * show CPU area (RAM, ROM, TOS) specific statistics.
                    229:  */
                    230: void Profile_CpuShowStats(void)
                    231: {
                    232:        fprintf(stderr, "Normal RAM (0-0x%X):\n", STRamEnd);
                    233:        show_cpu_area_stats(&cpu_profile.ram);
                    234: 
                    235:        fprintf(stderr, "ROM TOS (0x%X-0x%X):\n", TosAddress, TosAddress + TosSize);
                    236:        show_cpu_area_stats(&cpu_profile.tos);
                    237: 
1.1.1.2 ! root      238:        fprintf(stderr, "Cartridge ROM (0x%X-%X):\n", CART_START, CART_END);
1.1       root      239:        show_cpu_area_stats(&cpu_profile.rom);
                    240: 
                    241:        fprintf(stderr, "\n= %.5fs\n",
                    242:                (double)cpu_profile.all.cycles / MachineClocks.CPU_Freq);
                    243: 
                    244: #if ENABLE_WINUAE_CPU
                    245:        if (cpu_profile.all.misses) {   /* CPU cache in use? */
                    246:                int i;
                    247:                fprintf(stderr, "\nCache misses per instruction, number of occurrences:\n");
                    248:                for (i = 0; i < MAX_MISS; i++) {
                    249:                        fprintf(stderr, "- %d: %d\n", i, cpu_profile.miss_counts[i]);
                    250:                }
                    251:        }
                    252: #endif
                    253: }
                    254: 
                    255: /**
                    256:  * Show CPU instructions which execution was profiled, in the address order,
                    257:  * starting from the given address.  Return next disassembly address.
                    258:  */
                    259: Uint32 Profile_CpuShowAddresses(Uint32 lower, Uint32 upper, FILE *out)
                    260: {
                    261:        int oldcols[DISASM_COLUMNS], newcols[DISASM_COLUMNS];
                    262:        int show, shown, active;
                    263:        const char *symbol;
                    264:        cpu_profile_item_t *data;
                    265:        Uint32 idx, end, size;
                    266:        uaecptr nextpc, addr;
                    267: 
                    268:        data = cpu_profile.data;
                    269:        if (!data) {
                    270:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
                    271:                return 0;
                    272:        }
                    273: 
                    274:        size = cpu_profile.size;
                    275:        active = cpu_profile.active;
                    276:        if (upper) {
                    277:                end = address2index(upper);
                    278:                show = active;
                    279:                if (end > size) {
                    280:                        end = size;
                    281:                }
                    282:        } else {
                    283:                end = size;
                    284:                show = ConfigureParams.Debugger.nDisasmLines;
                    285:                if (!show || show > active) {
                    286:                        show = active;
                    287:                }
                    288:        }
                    289: 
                    290:        /* get/change columns */
                    291:        Disasm_GetColumns(oldcols);
                    292:        Disasm_DisableColumn(DISASM_COLUMN_HEXDUMP, oldcols, newcols);
                    293:        Disasm_SetColumns(newcols);
                    294: 
                    295:        fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <sum of i-cache misses>)\n", out);
                    296: 
                    297:        nextpc = 0;
                    298:        idx = address2index(lower);
                    299:        for (shown = 0; shown < show && idx < end; idx++) {
                    300:                if (!data[idx].count) {
                    301:                        continue;
                    302:                }
                    303:                addr = index2address(idx);
                    304:                if (addr != nextpc && nextpc) {
                    305:                        fprintf(out, "[...]\n");
                    306:                }
                    307:                symbol = Symbols_GetByCpuAddress(addr);
                    308:                if (symbol) {
                    309:                        fprintf(out, "%s:\n", symbol);
                    310:                }
                    311:                /* NOTE: column setup works only with 68kDisass disasm engine! */
                    312:                Disasm(out, addr, &nextpc, 1);
                    313:                shown++;
                    314:        }
                    315:        printf("Disassembled %d (of active %d) CPU addresses.\n", shown, active);
                    316: 
                    317:        /* restore disassembly columns */
                    318:        Disasm_SetColumns(oldcols);
                    319:        return nextpc;
                    320: }
                    321: 
                    322: /**
                    323:  * remove all disassembly columns except instruction ones.
                    324:  * data needed to restore columns is stored to "oldcols"
                    325:  */
                    326: static void leave_instruction_column(int *oldcols)
                    327: {
                    328:        int i, newcols[DISASM_COLUMNS];
                    329: 
                    330:        Disasm_GetColumns(oldcols);
                    331:        for (i = 0; i < DISASM_COLUMNS; i++) {
                    332:                if (i == DISASM_COLUMN_OPCODE || i == DISASM_COLUMN_OPERAND) {
                    333:                        continue;
                    334:                }
                    335:                Disasm_DisableColumn(i, oldcols, newcols);
                    336:                oldcols = newcols;
                    337:        }
                    338:        Disasm_SetColumns(newcols);
                    339: }
                    340: 
                    341: #if ENABLE_WINUAE_CPU
                    342: /**
                    343:  * compare function for qsort() to sort CPU profile data by instruction cache misses.
                    344:  */
                    345: static int cmp_cpu_misses(const void *p1, const void *p2)
                    346: {
                    347:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].misses;
                    348:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].misses;
                    349:        if (count1 > count2) {
                    350:                return -1;
                    351:        }
                    352:        if (count1 < count2) {
                    353:                return 1;
                    354:        }
                    355:        return 0;
                    356: }
                    357: 
                    358: /**
                    359:  * Sort CPU profile data addresses by instruction cache misses and show the results.
                    360:  */
                    361: void Profile_CpuShowMisses(int show)
                    362: {
                    363:        int active;
                    364:        int oldcols[DISASM_COLUMNS];
                    365:        Uint32 *sort_arr, *end, addr, nextpc;
                    366:        cpu_profile_item_t *data = cpu_profile.data;
                    367:        float percentage;
                    368:        Uint32 count;
                    369: 
                    370:        if (!cpu_profile.all.misses) {
                    371:                fprintf(stderr, "No CPU cache miss information available.\n");
                    372:                return;
                    373:        }
                    374: 
                    375:        active = cpu_profile.active;
                    376:        sort_arr = cpu_profile.sort_arr;
                    377:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_misses);
                    378: 
                    379:        leave_instruction_column(oldcols);
                    380: 
                    381:        printf("addr:\t\tmisses:\n");
                    382:        show = (show < active ? show : active);
                    383:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    384:                addr = index2address(*sort_arr);
                    385:                count = data[*sort_arr].misses;
                    386:                percentage = 100.0*count/cpu_profile.all.misses;
                    387:                printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
                    388:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    389:                Disasm(stdout, addr, &nextpc, 1);
                    390:        }
                    391:        printf("%d CPU addresses listed.\n", show);
                    392: 
                    393:        Disasm_SetColumns(oldcols);
                    394: }
                    395: #else
                    396: void Profile_CpuShowMisses(int show) {
                    397:        fprintf(stderr, "Cache misses are recorded only with WinUAE CPU.\n");
                    398: }
                    399: #endif
                    400: 
                    401: 
                    402: /**
                    403:  * compare function for qsort() to sort CPU profile data by cycles counts.
                    404:  */
                    405: static int cmp_cpu_cycles(const void *p1, const void *p2)
                    406: {
                    407:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].cycles;
                    408:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].cycles;
                    409:        if (count1 > count2) {
                    410:                return -1;
                    411:        }
                    412:        if (count1 < count2) {
                    413:                return 1;
                    414:        }
                    415:        return 0;
                    416: }
                    417: 
                    418: /**
                    419:  * Sort CPU profile data addresses by cycle counts and show the results.
                    420:  */
                    421: void Profile_CpuShowCycles(int show)
                    422: {
                    423:        int active;
                    424:        int oldcols[DISASM_COLUMNS];
                    425:        Uint32 *sort_arr, *end, addr, nextpc;
                    426:        cpu_profile_item_t *data = cpu_profile.data;
                    427:        float percentage;
                    428:        Uint32 count;
                    429: 
                    430:        if (!data) {
                    431:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
                    432:                return;
                    433:        }
                    434: 
                    435:        active = cpu_profile.active;
                    436:        sort_arr = cpu_profile.sort_arr;
                    437:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_cycles);
                    438: 
                    439:        leave_instruction_column(oldcols);
                    440: 
                    441:        printf("addr:\t\tcycles:\n");
                    442:        show = (show < active ? show : active);
                    443:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    444:                addr = index2address(*sort_arr);
                    445:                count = data[*sort_arr].cycles;
                    446:                percentage = 100.0*count/cpu_profile.all.cycles;
                    447:                printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
                    448:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    449:                Disasm(stdout, addr, &nextpc, 1);
                    450:        }
                    451:        printf("%d CPU addresses listed.\n", show);
                    452: 
                    453:        Disasm_SetColumns(oldcols);
                    454: }
                    455: 
                    456: /**
                    457:  * compare function for qsort() to sort CPU profile data by descending
                    458:  * address access counts.
                    459:  */
                    460: static int cmp_cpu_count(const void *p1, const void *p2)
                    461: {
                    462:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].count;
                    463:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].count;
                    464:        if (count1 > count2) {
                    465:                return -1;
                    466:        }
                    467:        if (count1 < count2) {
                    468:                return 1;
                    469:        }
                    470:        return 0;
                    471: }
                    472: 
                    473: /**
                    474:  * Sort CPU profile data addresses by call counts and show the results.
                    475:  * If symbols are requested and symbols are loaded, show (only) addresses
                    476:  * matching a symbol.
                    477:  */
                    478: void Profile_CpuShowCounts(int show, bool only_symbols)
                    479: {
                    480:        cpu_profile_item_t *data = cpu_profile.data;
                    481:        int symbols, matched, active;
                    482:        int oldcols[DISASM_COLUMNS];
                    483:        Uint32 *sort_arr, *end, addr, nextpc;
                    484:        const char *name;
                    485:        float percentage;
                    486:        Uint32 count;
                    487: 
                    488:        if (!data) {
                    489:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
                    490:                return;
                    491:        }
                    492:        active = cpu_profile.active;
                    493:        show = (show < active ? show : active);
                    494: 
                    495:        sort_arr = cpu_profile.sort_arr;
                    496:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_count);
                    497: 
                    498:        if (!only_symbols) {
                    499:                leave_instruction_column(oldcols);
                    500:                printf("addr:\t\tcount:\n");
                    501:                for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    502:                        addr = index2address(*sort_arr);
                    503:                        count = data[*sort_arr].count;
                    504:                        percentage = 100.0*count/cpu_profile.all.count;
                    505:                        printf("0x%06x\t%5.2f%%\t%d%s\t",
                    506:                               addr, percentage, count,
                    507:                               count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    508:                        Disasm(stdout, addr, &nextpc, 1);
                    509:                }
                    510:                printf("%d CPU addresses listed.\n", show);
                    511:                Disasm_SetColumns(oldcols);
                    512:                return;
                    513:        }
                    514: 
                    515:        symbols = Symbols_CpuCount();
                    516:        if (!symbols) {
                    517:                fprintf(stderr, "ERROR: no CPU symbols loaded!\n");
                    518:                return;
                    519:        }
                    520:        matched = 0;    
                    521: 
                    522:        leave_instruction_column(oldcols);
                    523: 
                    524:        printf("addr:\t\tcount:\t\tsymbol:\n");
                    525:        for (end = sort_arr + active; sort_arr < end; sort_arr++) {
                    526: 
                    527:                addr = index2address(*sort_arr);
                    528:                name = Symbols_GetByCpuAddress(addr);
                    529:                if (!name) {
                    530:                        continue;
                    531:                }
                    532:                count = data[*sort_arr].count;
                    533:                percentage = 100.0*count/cpu_profile.all.count;
                    534:                printf("0x%06x\t%5.2f%%\t%d\t%s%s\t",
                    535:                       addr, percentage, count, name,
                    536:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    537:                Disasm(stdout, addr, &nextpc, 1);
                    538: 
                    539:                matched++;
                    540:                if (matched >= show || matched >= symbols) {
                    541:                        break;
                    542:                }
                    543:        }
                    544:        printf("%d CPU symbols listed.\n", matched);
                    545: 
                    546:        Disasm_SetColumns(oldcols);
                    547: }
                    548: 
                    549: 
                    550: static const char * addr2name(Uint32 addr, Uint64 *total)
                    551: {
                    552:        Uint32 idx = address2index(addr);
                    553:        *total = cpu_profile.data[idx].count;
                    554:        return Symbols_GetByCpuAddress(addr);
                    555: }
                    556: 
                    557: /**
                    558:  * Output CPU callers info to given file.
                    559:  */
                    560: void Profile_CpuShowCallers(FILE *fp)
                    561: {
                    562:        Profile_ShowCallers(fp, cpu_callinfo.sites, cpu_callinfo.site, addr2name);
                    563: }
                    564: 
                    565: /**
                    566:  * Save CPU profile information to given file.
                    567:  */
                    568: void Profile_CpuSave(FILE *out)
                    569: {
                    570:        Uint32 text;
                    571:        fputs("Field names:\tExecuted instructions, Used cycles, Instruction cache misses\n", out);
                    572:        /* (Python) pegexp that matches address and all describled fields from disassembly:
                    573:         * $<hex>  :  <ASM>  <percentage>% (<count>, <cycles>, <misses>)
                    574:         * $e5af38 :   rts           0.00% (12, 0, 12)
                    575:         */
                    576:        fputs("Field regexp:\t^\\$([0-9a-f]+) :.*% \\((.*)\\)$\n", out);
                    577:        /* some information for interpreting the addresses */
                    578:        fprintf(out, "ROM_TOS:\t0x%06x-0x%06x\n", TosAddress, TosAddress + TosSize);
                    579:        text = DebugInfo_GetTEXT();
                    580:        if (text < TosAddress) {
                    581:                fprintf(out, "PROGRAM_TEXT:\t0x%06x-0x%06x\n", text, DebugInfo_GetTEXTEnd());
                    582:        }
1.1.1.2 ! root      583:        fprintf(out, "CARTRIDGE:\t0x%06x-0x%06x\n", CART_START, CART_END);
        !           584:        Profile_CpuShowAddresses(0, CART_END-2, out);
1.1       root      585:        Profile_CpuShowCallers(out);
                    586: }
                    587: 
                    588: /* ------------------ CPU profile control ----------------- */
                    589: 
                    590: /**
                    591:  * Initialize CPU profiling when necessary.  Return true if profiling.
                    592:  */
                    593: bool Profile_CpuStart(void)
                    594: {
                    595:        int size;
                    596: 
                    597:        Profile_FreeCallinfo(&(cpu_callinfo));
                    598:        if (cpu_profile.sort_arr) {
                    599:                /* remove previous results */
                    600:                free(cpu_profile.sort_arr);
                    601:                free(cpu_profile.data);
                    602:                cpu_profile.sort_arr = NULL;
                    603:                cpu_profile.data = NULL;
                    604:                printf("Freed previous CPU profile buffers.\n");
                    605:        }
                    606:        if (!cpu_profile.enabled) {
                    607:                return false;
                    608:        }
                    609:        /* zero everything */
                    610:        memset(&cpu_profile, 0, sizeof(cpu_profile));
                    611: 
                    612:        /* Shouldn't change within same debug session */
                    613:        size = (STRamEnd + 0x20000 + TosSize) / 2;
                    614: 
                    615:        /* Add one entry for catching invalid PC values */
                    616:        cpu_profile.data = calloc(size + 1, sizeof(*cpu_profile.data));
                    617:        if (!cpu_profile.data) {
                    618:                perror("ERROR, new CPU profile buffer alloc failed");
                    619:                return false;
                    620:        }
                    621:        printf("Allocated CPU profile buffer (%d MB).\n",
                    622:               (int)sizeof(*cpu_profile.data)*size/(1024*1024));
                    623:        cpu_profile.size = size;
                    624: 
                    625:        Profile_AllocCallinfo(&(cpu_callinfo), Symbols_CpuCount(), "CPU");
                    626: 
                    627:        /* special hack for EmuTOS */
                    628:        etos_switcher = PC_UNDEFINED;
                    629:        if (cpu_callinfo.sites && bIsEmuTOS &&
                    630:            (!Symbols_GetCpuAddress(SYMTYPE_TEXT, "_switchto", &etos_switcher) || etos_switcher < TosAddress)) {
                    631:                etos_switcher = PC_UNDEFINED;
                    632:        }
                    633: 
                    634:        cpu_profile.prev_cycles = Cycles_GetCounter(CYCLES_COUNTER_CPU);
                    635:        cpu_profile.prev_family = OpcodeFamily;
                    636:        cpu_profile.prev_pc = M68000_GetPC() & 0xffffff;
                    637: 
1.1.1.2 ! root      638:        cpu_profile.loop_start = PC_UNDEFINED;
        !           639:        cpu_profile.loop_end = PC_UNDEFINED;
        !           640:        cpu_profile.loop_count = 0;
        !           641:        Profile_LoopReset();
        !           642: 
1.1       root      643:        cpu_profile.disasm_addr = 0;
                    644:        cpu_profile.processed = false;
                    645:        cpu_profile.enabled = true;
                    646:        return cpu_profile.enabled;
                    647: }
                    648: 
                    649: /**
                    650:  * return true if pc could be next instruction for previous pc
                    651:  */
                    652: static bool is_prev_instr(Uint32 prev_pc, Uint32 pc)
                    653: {
                    654:        /* just moved to next instruction (1-2 words)? */
                    655:        if (prev_pc < pc && (pc - prev_pc) <= 10) {
                    656:                return true;
                    657:        }
                    658:        return false;
                    659: }
                    660: 
                    661: /**
                    662:  * return caller instruction type classification
                    663:  */
                    664: static calltype_t cpu_opcode_type(int family, Uint32 prev_pc, Uint32 pc)
                    665: {
                    666:        switch (family) {
                    667: 
                    668:        case i_JSR:
                    669:        case i_BSR:
                    670:                return CALL_SUBROUTINE;
                    671: 
                    672:        case i_RTS:
                    673:        case i_RTR:
                    674:        case i_RTD:
                    675:                return CALL_SUBRETURN;
                    676: 
                    677:        case i_JMP:     /* often used also for "inlined" function calls... */
                    678:        case i_Bcc:     /* both BRA & BCC */
                    679:        case i_FBcc:
                    680:        case i_DBcc:
                    681:        case i_FDBcc:
                    682:                return CALL_BRANCH;
                    683: 
                    684:        case i_TRAP:
                    685:        case i_TRAPV:
                    686:        case i_TRAPcc:
                    687:        case i_FTRAPcc:
                    688:        case i_STOP:
                    689:        case i_ILLG:
                    690:        case i_CHK:
                    691:        case i_CHK2:
                    692:        case i_BKPT:
                    693:                return CALL_EXCEPTION;
                    694: 
                    695:        case i_RTE:
                    696:                return CALL_EXCRETURN;
                    697:        }
                    698:        /* just moved to next instruction? */
                    699:        if (is_prev_instr(prev_pc, pc)) {
                    700:                return CALL_NEXT;
                    701:        }
                    702:        return CALL_UNKNOWN;
                    703: }
                    704: 
                    705: /**
                    706:  * If call tracking is enabled (there are symbols), collect
                    707:  * information about subroutine and other calls, and their costs.
                    708:  * 
                    709:  * Like with profile data, caller info checks need to be for previous
                    710:  * instruction, that's why "pc" argument for this function actually
                    711:  * needs to be previous PC.
                    712:  */
                    713: static void collect_calls(Uint32 pc, counters_t *counters)
                    714: {
                    715:        calltype_t flag;
                    716:        int idx, family;
                    717:        Uint32 prev_pc, caller_pc;
                    718: 
                    719:        family = cpu_profile.prev_family;
                    720:        cpu_profile.prev_family = OpcodeFamily;
                    721: 
                    722:        prev_pc = cpu_callinfo.prev_pc;
                    723:        cpu_callinfo.prev_pc = pc;
                    724:        caller_pc = PC_UNDEFINED;
                    725: 
                    726:        /* address is return address for last subroutine call? */
                    727:        if (unlikely(pc == cpu_callinfo.return_pc) && likely(cpu_callinfo.depth)) {
                    728: 
                    729:                flag = cpu_opcode_type(family, prev_pc, pc);
1.1.1.2 ! root      730:                /* previous address can be exception return (e.g. RTE) instead of RTS,
        !           731:                 * if exception occurred right after returning from subroutine call.
1.1       root      732:                 */
                    733:                if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) {
                    734:                        caller_pc = Profile_CallEnd(&cpu_callinfo, counters);
                    735:                } else {
                    736: #if DEBUG
                    737:                        /* although at return address, it didn't return yet,
                    738:                         * e.g. because there was a jsr or jump to return address
                    739:                         */
                    740:                        Uint32 nextpc;
1.1.1.2 ! root      741:                        fprintf(stderr, "WARNING: subroutine call returned 0x%x -> 0x%x, not through RTS!\n", prev_pc, pc);
1.1       root      742:                        Disasm(stderr, prev_pc, &nextpc, 1);
                    743: #endif
                    744:                }
1.1.1.2 ! root      745:                /* next address might be another symbol, so need to fall through */
1.1       root      746:        }
                    747: 
                    748:        /* address is one which we're tracking? */
                    749:        idx = Symbols_GetCpuAddressIndex(pc);
                    750:        if (unlikely(idx >= 0)) {
                    751: 
                    752:                flag = cpu_opcode_type(family, prev_pc, pc);
1.1.1.2 ! root      753:                if (flag == CALL_SUBROUTINE || flag == CALL_EXCEPTION) {
1.1       root      754:                        /* special HACK for for EmuTOS AES switcher which
                    755:                         * changes stack content to remove itself from call
                    756:                         * stack and uses RTS for subroutine *calls*, not
                    757:                         * for returning from them.
                    758:                         *
                    759:                         * It wouldn't be reliable to detect calls from it,
                    760:                         * so I'm making call *to* it show up as branch, to
                    761:                         * keep callstack depth correct.
                    762:                         */
                    763:                        if (unlikely(pc == etos_switcher)) {
                    764:                                flag = CALL_BRANCH;
                    765:                        } else if (unlikely(prev_pc == PC_UNDEFINED)) {
                    766:                                /* if first profiled instruction
                    767:                                 * is subroutine call, it doesn't have
                    768:                                 * valid prev_pc value stored
                    769:                                 */
                    770:                                cpu_callinfo.return_pc = PC_UNDEFINED;
                    771:                                fprintf(stderr, "WARNING: previous PC from callinfo for 0x%d is undefined!\n", pc);
                    772: #if DEBUG
                    773:                                skip_assert = true;
                    774:                                DebugUI(REASON_CPU_EXCEPTION);
                    775: #endif
                    776:                        } else {
                    777:                                /* slow! */
                    778:                                cpu_callinfo.return_pc = Disasm_GetNextPC(prev_pc);
                    779:                        }
                    780:                } else if (caller_pc != PC_UNDEFINED) {
1.1.1.2 ! root      781:                        /* returned from function to first instruction of another symbol:
1.1       root      782:                         *      0xf384  jsr some_function
                    783:                         *      other_symbol:
                    784:                         *      0f3x8a  some_instruction
                    785:                         * -> change return instruction address to
                    786:                         *    address of what did the returned call.
                    787:                         */
                    788:                        prev_pc = caller_pc;
                    789:                        assert(is_prev_instr(prev_pc, pc));
                    790:                        flag = CALL_NEXT;
                    791:                }
                    792:                Profile_CallStart(idx, &cpu_callinfo, prev_pc, flag, pc, counters);
                    793:        }
                    794: }
                    795: 
                    796: /**
1.1.1.2 ! root      797:  * log last loop info, if there's suitable data for one
        !           798:  */
        !           799: static void log_last_loop(void)
        !           800: {
        !           801:        unsigned len = cpu_profile.loop_end - cpu_profile.loop_start;
        !           802:        if (cpu_profile.loop_count > 1 && (len < profile_loop.cpu_limit || !profile_loop.cpu_limit)) {
        !           803:                fprintf(profile_loop.fp, "CPU %d 0x%06x %d %d\n", nVBLs,
        !           804:                        cpu_profile.loop_start, len, cpu_profile.loop_count);
        !           805:        }
        !           806: }
        !           807: 
        !           808: /**
1.1       root      809:  * Update CPU cycle and count statistics for PC address.
                    810:  *
                    811:  * This gets called after instruction has executed and PC
                    812:  * has advanced to next instruction.
                    813:  */
                    814: void Profile_CpuUpdate(void)
                    815: {
                    816:        counters_t *counters = &(cpu_profile.all);
                    817:        Uint32 pc, prev_pc, idx, cycles, misses;
                    818:        cpu_profile_item_t *prev;
                    819: 
                    820:        prev_pc = cpu_profile.prev_pc;
                    821:        /* PC may have extra bits, they need to be masked away as
                    822:         * emulation itself does that too when PC value is used
                    823:         */
                    824:        cpu_profile.prev_pc = pc = M68000_GetPC() & 0xffffff;
                    825: 
1.1.1.2 ! root      826:        if (unlikely(profile_loop.fp)) {
        !           827:                if (pc < prev_pc) {
        !           828:                        if (pc == cpu_profile.loop_start && prev_pc == cpu_profile.loop_end) {
        !           829:                                cpu_profile.loop_count++;
        !           830:                        } else {
        !           831:                                cpu_profile.loop_start = pc;
        !           832:                                cpu_profile.loop_end = prev_pc;
        !           833:                                cpu_profile.loop_count = 1;
        !           834:                        }
        !           835:                } else {
        !           836:                        if (pc > cpu_profile.loop_end) {
        !           837:                                log_last_loop();
        !           838:                                cpu_profile.loop_end = 0xffffffff;                      
        !           839:                                cpu_profile.loop_count = 0;
        !           840:                        }
        !           841:                }
        !           842:        }
        !           843: 
1.1       root      844:        idx = address2index(prev_pc);
                    845:        assert(idx <= cpu_profile.size);
                    846:        prev = cpu_profile.data + idx;
                    847: 
                    848:        if (likely(prev->count < MAX_CPU_PROFILE_VALUE)) {
                    849:                prev->count++;
                    850:        }
                    851: 
                    852: #if USE_CYCLES_COUNTER
                    853:        /* Confusingly, with DSP enabled, cycle counter is for this instruction,
                    854:         * without DSP enabled, it's a monotonically increasing counter.
                    855:         */
                    856:        if (bDspEnabled) {
                    857:                cycles = Cycles_GetCounter(CYCLES_COUNTER_CPU);
                    858:        } else {
                    859:                Uint32 newcycles = Cycles_GetCounter(CYCLES_COUNTER_CPU);
                    860:                cycles = newcycles - cpu_profile.prev_cycles;
                    861:                cpu_profile.prev_cycles = newcycles;
                    862:        }
                    863: #else
                    864:        cycles = CurrentInstrCycles + nWaitStateCycles;
                    865: #endif
                    866:        /* cycles are based on 8Mhz clock, change them to correct one */
                    867:        cycles <<= nCpuFreqShift;
                    868: 
                    869:        if (likely(prev->cycles < MAX_CPU_PROFILE_VALUE - cycles)) {
                    870:                prev->cycles += cycles;
                    871:        } else {
                    872:                prev->cycles = MAX_CPU_PROFILE_VALUE;
                    873:        }
                    874: 
                    875: #if ENABLE_WINUAE_CPU
                    876:        misses = CpuInstruction.iCacheMisses;
                    877:        assert(misses < MAX_MISS);
                    878:        cpu_profile.miss_counts[misses]++;
                    879:        if (likely(prev->misses < MAX_CPU_PROFILE_VALUE - misses)) {
                    880:                prev->misses += misses;
                    881:        } else {
                    882:                prev->misses = MAX_CPU_PROFILE_VALUE;
                    883:        }
                    884: #else
                    885:        misses = 0;
                    886: #endif
                    887:        if (cpu_callinfo.sites) {
                    888:                collect_calls(prev_pc, counters);
                    889:        }
                    890:        /* counters are increased after caller info is processed,
                    891:         * otherwise cost for the instruction calling the callee
                    892:         * doesn't get accounted to caller (but callee).
                    893:         */
                    894:        counters->misses += misses;
                    895:        counters->cycles += cycles;
                    896:        counters->count++;
                    897: 
                    898: #if DEBUG
                    899:        if (unlikely(OpcodeFamily == 0)) {
                    900:                Uint32 nextpc;
                    901:                fputs("WARNING: instruction opcode family is zero (=i_ILLG) for instruction:\n", stderr);
                    902:                Disasm(stderr, prev_pc, &nextpc, 1);
                    903:        }
                    904:        /* catch too large (and negative) cycles for other than STOP instruction */
                    905:        if (unlikely(cycles > 512 && OpcodeFamily != i_STOP)) {
                    906:                Uint32 nextpc;
                    907:                fprintf(stderr, "WARNING: cycles %d > 512:\n", cycles);
                    908:                Disasm(stderr, prev_pc, &nextpc, 1);
                    909:        }
                    910:        if (unlikely(cycles == 0)) {
                    911:                Uint32 nextpc;
                    912:                fputs("WARNING: Zero cycles for an opcode:\n", stderr);
                    913:                Disasm(stderr, prev_pc, &nextpc, 1);
                    914:        }
                    915: #endif
                    916: }
                    917: 
                    918: 
                    919: /**
                    920:  * Helper for accounting CPU profile area item.
                    921:  */
                    922: static void update_area_item(profile_area_t *area, Uint32 addr, cpu_profile_item_t *item)
                    923: {
                    924:        Uint32 cycles = item->cycles;
                    925:        Uint32 count = item->count;
                    926: 
                    927:        if (!count) {
                    928:                return;
                    929:        }
                    930:        area->counters.count += count;
                    931:        area->counters.misses += item->misses;
                    932:        area->counters.cycles += cycles;
                    933: 
                    934:        if (cycles == MAX_CPU_PROFILE_VALUE) {
                    935:                area->overflow = true;
                    936:        }
                    937:        if (addr < area->lowest) {
                    938:                area->lowest = addr;
                    939:        }
                    940:        area->highest = addr;
                    941: 
                    942:        area->active++;
                    943: }
                    944: 
                    945: /**
                    946:  * Helper for collecting CPU profile area statistics.
                    947:  */
                    948: static Uint32 update_area(profile_area_t *area, Uint32 start, Uint32 end)
                    949: {
                    950:        cpu_profile_item_t *item;
                    951:        Uint32 addr;
                    952: 
                    953:        memset(area, 0, sizeof(profile_area_t));
                    954:        area->lowest = cpu_profile.size;
                    955: 
                    956:        item = &(cpu_profile.data[start]);
                    957:        for (addr = start; addr < end; addr++, item++) {
                    958:                update_area_item(area, addr, item);
                    959:        }
                    960:        return addr;
                    961: }
                    962: 
                    963: /**
                    964:  * Helper for initializing CPU profile area sorting indexes.
                    965:  */
                    966: static Uint32* index_area(profile_area_t *area, Uint32 *sort_arr)
                    967: {
                    968:        cpu_profile_item_t *item;
                    969:        Uint32 addr;
                    970: 
                    971:        item = &(cpu_profile.data[area->lowest]);
                    972:        for (addr = area->lowest; addr <= area->highest; addr++, item++) {
                    973:                if (item->count) {
                    974:                        *sort_arr++ = addr;
                    975:                }
                    976:        }
                    977:        return sort_arr;
                    978: }
                    979: 
                    980: /**
                    981:  * Stop and process the CPU profiling data; collect stats and
                    982:  * prepare for more optimal sorting.
                    983:  */
                    984: void Profile_CpuStop(void)
                    985: {
                    986:        Uint32 *sort_arr, next;
                    987:        int active;
                    988: 
                    989:        if (cpu_profile.processed || !cpu_profile.enabled) {
                    990:                return;
                    991:        }
1.1.1.2 ! root      992: 
        !           993:        log_last_loop();
        !           994:        if (profile_loop.fp) {
        !           995:                fflush(profile_loop.fp);
        !           996:        }
        !           997: 
1.1       root      998:        /* user didn't change RAM or TOS size in the meanwhile? */
                    999:        assert(cpu_profile.size == (STRamEnd + 0x20000 + TosSize) / 2);
                   1000: 
                   1001:        Profile_FinalizeCalls(&(cpu_callinfo), &(cpu_profile.all), Symbols_GetByCpuAddress);
                   1002: 
                   1003:        /* find lowest and highest addresses executed etc */
                   1004:        next = update_area(&cpu_profile.ram, 0, STRamEnd/2);
                   1005:        next = update_area(&cpu_profile.tos, next, (STRamEnd + TosSize)/2);
                   1006:        next = update_area(&cpu_profile.rom, next, cpu_profile.size);
                   1007:        assert(next == cpu_profile.size);
                   1008: 
                   1009: #if DEBUG
                   1010:        if (skip_assert) {
                   1011:                skip_assert = false;
                   1012:        } else
                   1013: #endif
                   1014:        {
                   1015:                assert(cpu_profile.all.misses == cpu_profile.ram.counters.misses + cpu_profile.tos.counters.misses + cpu_profile.rom.counters.misses);
                   1016:                assert(cpu_profile.all.cycles == cpu_profile.ram.counters.cycles + cpu_profile.tos.counters.cycles + cpu_profile.rom.counters.cycles);
                   1017:                assert(cpu_profile.all.count == cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count);
                   1018:        }
                   1019: 
                   1020:        /* allocate address array for sorting */
                   1021:        active = cpu_profile.ram.active + cpu_profile.rom.active + cpu_profile.tos.active;
                   1022:        sort_arr = calloc(active, sizeof(*sort_arr));
                   1023: 
                   1024:        if (!sort_arr) {
                   1025:                perror("ERROR: allocating CPU profile address data");
                   1026:                free(cpu_profile.data);
                   1027:                cpu_profile.data = NULL;
                   1028:                return;
                   1029:        }
                   1030:        printf("Allocated CPU profile address buffer (%d KB).\n",
                   1031:               (int)sizeof(*sort_arr)*(active+512)/1024);
                   1032:        cpu_profile.sort_arr = sort_arr;
                   1033:        cpu_profile.active = active;
                   1034: 
                   1035:        /* and fill addresses for used instructions... */
                   1036:        sort_arr = index_area(&cpu_profile.ram, sort_arr);
                   1037:        sort_arr = index_area(&cpu_profile.tos, sort_arr);
                   1038:        sort_arr = index_area(&cpu_profile.rom, sort_arr);
                   1039:        assert(sort_arr == cpu_profile.sort_arr + cpu_profile.active);
                   1040:        //printf("%d/%d/%d\n", area->active, sort_arr-cpu_profile.sort_arr, active);
                   1041: 
                   1042:        Profile_CpuShowStats();
                   1043:        cpu_profile.processed = true;
                   1044: }
                   1045: 
                   1046: /**
                   1047:  * Get pointers to CPU profile enabling and disasm address variables
                   1048:  * for updating them (in parser).
                   1049:  */
                   1050: void Profile_CpuGetPointers(bool **enabled, Uint32 **disasm_addr)
                   1051: {
                   1052:        *disasm_addr = &cpu_profile.disasm_addr;
                   1053:        *enabled = &cpu_profile.enabled;
                   1054: }
                   1055: 
                   1056: /**
                   1057:  * Get callinfo & symbol search pointers for stack walking.
                   1058:  */
                   1059: void Profile_CpuGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32))
                   1060: {
                   1061:        *callinfo = &(cpu_callinfo);
                   1062:        *get_symbol = Symbols_GetByCpuAddress;
                   1063: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.