Annotation of hatari/src/debug/profilecpu.c, revision 1.1.1.1

1.1       root        1: /*
                      2:  * Hatari - profilecpu.c
                      3:  * 
                      4:  * Copyright (C) 2010-2013 by Eero Tamminen
                      5:  *
                      6:  * This file is distributed under the GNU General Public License, version 2
                      7:  * or at your option any later version. Read the file gpl.txt for details.
                      8:  *
                      9:  * profilecpu.c - functions for profiling CPU and showing the results.
                     10:  */
                     11: const char Profilecpu_fileid[] = "Hatari profilecpu.c : " __DATE__ " " __TIME__;
                     12: 
                     13: #include <stdio.h>
                     14: #include <inttypes.h>
                     15: #include <assert.h>
                     16: #include "main.h"
                     17: #include "configuration.h"
                     18: #include "clocks_timings.h"
                     19: #include "debugInfo.h"
                     20: #include "dsp.h"
                     21: #include "m68000.h"
                     22: #include "68kDisass.h"
                     23: #include "profile.h"
                     24: #include "profile_priv.h"
                     25: #include "stMemory.h"
                     26: #include "symbols.h"
                     27: #include "tos.h"
                     28: 
                     29: /* if non-zero, output (more) warnings on suspicious:
                     30:  * - cycle/instruction counts
                     31:  * - PC switches
                     32:  * And drop to debugger on invalid PC addresses.
                     33:  */
                     34: #define DEBUG 0
                     35: #if DEBUG
                     36: #include "debugui.h"
                     37: static bool skip_assert;
                     38: #endif
                     39: 
                     40: static callinfo_t cpu_callinfo;
                     41: 
                     42: /* This is relevant with WinUAE CPU core:
                     43:  * - the default cycle exact variant needs this define to be non-zero
                     44:  * - non-cycle exact and MMU variants need this define to be 0
                     45:  *   for cycle counts to make any sense
                     46:  */
                     47: #define USE_CYCLES_COUNTER 1
                     48: 
                     49: #define MAX_CPU_PROFILE_VALUE 0xFFFFFFFF
                     50: 
                     51: typedef struct {
                     52:        Uint32 count;   /* how many times this address instrcution is executed */
                     53:        Uint32 cycles;  /* how many CPU cycles was taken at this address */
                     54:        Uint32 misses;  /* how many CPU cache misses happened at this address */
                     55: } cpu_profile_item_t;
                     56: 
                     57: #define MAX_MISS 4
                     58: 
                     59: static struct {
                     60:        counters_t all;       /* total counts for all areas */
                     61:        Uint32 miss_counts[MAX_MISS];  /* cache miss counts */
                     62:        cpu_profile_item_t *data; /* profile data items */
                     63:        Uint32 size;          /* number of allocated profile data items */
                     64:        profile_area_t ram;   /* normal RAM stats */
                     65:        profile_area_t rom;   /* cartridge ROM stats */
                     66:        profile_area_t tos;   /* ROM TOS stats */
                     67:        int active;           /* number of active data items in all areas */
                     68:        Uint32 *sort_arr;     /* data indexes used for sorting */
                     69:        Uint32 prev_cycles;   /* previous instruction cycles counter */
                     70:        Uint32 prev_pc;       /* previous instruction address */
                     71:        int prev_family;      /* previous instruction opcode family */
                     72:        Uint32 disasm_addr;   /* 'addresses' command start address */
                     73:        bool processed;       /* true when data is already processed */
                     74:        bool enabled;         /* true when profiling enabled */
                     75: } cpu_profile;
                     76: 
                     77: /* special hack for EmuTOS */
                     78: static Uint32 etos_switcher;
                     79: 
                     80: 
                     81: /* ------------------ CPU profile address mapping ----------------- */
                     82: 
                     83: /**
                     84:  * convert Atari memory address to sorting array profile data index.
                     85:  */
                     86: static inline Uint32 address2index(Uint32 pc)
                     87: {
                     88:        if (unlikely(pc & 1)) {
                     89:                fprintf(stderr, "WARNING: odd CPU profile instruction address 0x%x!\n", pc);
                     90: #if DEBUG
                     91:                skip_assert = true;
                     92:                DebugUI(REASON_CPU_EXCEPTION);
                     93: #endif
                     94:        }
                     95:        if (pc >= TosAddress && pc < TosAddress + TosSize) {
                     96:                /* TOS, put it after RAM data */
                     97:                pc = pc - TosAddress + STRamEnd;
                     98: 
                     99:        } else if (pc >= 0xFA0000 && pc < 0xFC0000) {
                    100:                /* ROM, put it after RAM & TOS data */
                    101:                pc = pc - 0xFA0000 + STRamEnd + TosSize;
                    102: 
                    103:        } else {
                    104:                /* if in RAM, use as-is */
                    105:                if (unlikely(pc >= STRamEnd)) {
                    106:                        fprintf(stderr, "WARNING: 'invalid' CPU PC profile instruction address 0x%x!\n", pc);
                    107:                        /* extra entry at end is reserved for invalid PC values */
                    108:                        pc = STRamEnd + TosSize + 0x20000;
                    109: #if DEBUG
                    110:                        skip_assert = true;
                    111:                        DebugUI(REASON_CPU_EXCEPTION);
                    112: #endif
                    113:                }
                    114:        }
                    115:        /* CPU instructions are at even addresses, save space by halving */
                    116:        return (pc >> 1);
                    117: }
                    118: 
                    119: /**
                    120:  * convert sorting array profile data index to Atari memory address.
                    121:  */
                    122: static Uint32 index2address(Uint32 idx)
                    123: {
                    124:        idx <<= 1;
                    125:        /* RAM */
                    126:        if (idx < STRamEnd) {
                    127:                return idx;
                    128:        }
                    129:        /* TOS */
                    130:        idx -= STRamEnd;
                    131:        if (idx < TosSize) {
                    132:                return idx + TosAddress;
                    133:        }
                    134:        /* ROM */
                    135:        return idx - TosSize + 0xFA0000;
                    136: }
                    137: 
                    138: /* ------------------ CPU profile results ----------------- */
                    139: 
                    140: /**
                    141:  * Get CPU cycles, count and count percentage for given address.
                    142:  * Return true if data was available and non-zero, false otherwise.
                    143:  */
                    144: bool Profile_CpuAddressData(Uint32 addr, float *percentage, Uint32 *count, Uint32 *cycles, Uint32 *misses)
                    145: {
                    146:        Uint32 idx;
                    147:        if (!cpu_profile.data) {
                    148:                return false;
                    149:        }
                    150:        idx = address2index(addr);
                    151:        *misses = cpu_profile.data[idx].misses;
                    152:        *cycles = cpu_profile.data[idx].cycles;
                    153:        *count = cpu_profile.data[idx].count;
                    154:        if (cpu_profile.all.count) {
                    155:                *percentage = 100.0*(*count)/cpu_profile.all.count;
                    156:        } else {
                    157:                *percentage = 0.0;
                    158:        }
                    159:        return (*count > 0);
                    160: }
                    161: 
                    162: /**
                    163:  * Helper to show statistics for specified CPU profile area.
                    164:  */
                    165: static void show_cpu_area_stats(profile_area_t *area)
                    166: {
                    167:        if (!area->active) {
                    168:                fprintf(stderr, "- no activity\n");
                    169:                return;
                    170:        }
                    171:        fprintf(stderr, "- active address range:\n  0x%06x-0x%06x\n",
                    172:                index2address(area->lowest),
                    173:                index2address(area->highest));
                    174:        fprintf(stderr, "- active instruction addresses:\n  %d (%.2f%% of all)\n",
                    175:                area->active,
                    176:                100.0 * area->active / cpu_profile.active);
                    177:        fprintf(stderr, "- executed instructions:\n  %"PRIu64" (%.2f%% of all)\n",
                    178:                area->counters.count,
                    179:                100.0 * area->counters.count / cpu_profile.all.count);
                    180: #if ENABLE_WINUAE_CPU
                    181:        if (cpu_profile.all.misses) {   /* CPU cache in use? */
                    182:                fprintf(stderr, "- instruction cache misses:\n  %"PRIu64" (%.2f%% of all)\n",
                    183:                        area->counters.misses,
                    184:                        100.0 * area->counters.misses / cpu_profile.all.misses);
                    185:        }
                    186: #endif
                    187:        fprintf(stderr, "- used cycles:\n  %"PRIu64" (%.2f%% of all)\n  = %.5fs\n",
                    188:                area->counters.cycles,
                    189:                100.0 * area->counters.cycles / cpu_profile.all.cycles,
                    190:                (double)area->counters.cycles / MachineClocks.CPU_Freq);
                    191:        if (area->overflow) {
                    192:                fprintf(stderr, "  *** COUNTER OVERFLOW! ***\n");
                    193:        }
                    194: }
                    195: 
                    196: 
                    197: /**
                    198:  * show CPU area (RAM, ROM, TOS) specific statistics.
                    199:  */
                    200: void Profile_CpuShowStats(void)
                    201: {
                    202:        fprintf(stderr, "Normal RAM (0-0x%X):\n", STRamEnd);
                    203:        show_cpu_area_stats(&cpu_profile.ram);
                    204: 
                    205:        fprintf(stderr, "ROM TOS (0x%X-0x%X):\n", TosAddress, TosAddress + TosSize);
                    206:        show_cpu_area_stats(&cpu_profile.tos);
                    207: 
                    208:        fprintf(stderr, "Cartridge ROM (0xFA0000-0xFC0000):\n");
                    209:        show_cpu_area_stats(&cpu_profile.rom);
                    210: 
                    211:        fprintf(stderr, "\n= %.5fs\n",
                    212:                (double)cpu_profile.all.cycles / MachineClocks.CPU_Freq);
                    213: 
                    214: #if ENABLE_WINUAE_CPU
                    215:        if (cpu_profile.all.misses) {   /* CPU cache in use? */
                    216:                int i;
                    217:                fprintf(stderr, "\nCache misses per instruction, number of occurrences:\n");
                    218:                for (i = 0; i < MAX_MISS; i++) {
                    219:                        fprintf(stderr, "- %d: %d\n", i, cpu_profile.miss_counts[i]);
                    220:                }
                    221:        }
                    222: #endif
                    223: }
                    224: 
                    225: /**
                    226:  * Show CPU instructions which execution was profiled, in the address order,
                    227:  * starting from the given address.  Return next disassembly address.
                    228:  */
                    229: Uint32 Profile_CpuShowAddresses(Uint32 lower, Uint32 upper, FILE *out)
                    230: {
                    231:        int oldcols[DISASM_COLUMNS], newcols[DISASM_COLUMNS];
                    232:        int show, shown, active;
                    233:        const char *symbol;
                    234:        cpu_profile_item_t *data;
                    235:        Uint32 idx, end, size;
                    236:        uaecptr nextpc, addr;
                    237: 
                    238:        data = cpu_profile.data;
                    239:        if (!data) {
                    240:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
                    241:                return 0;
                    242:        }
                    243: 
                    244:        size = cpu_profile.size;
                    245:        active = cpu_profile.active;
                    246:        if (upper) {
                    247:                end = address2index(upper);
                    248:                show = active;
                    249:                if (end > size) {
                    250:                        end = size;
                    251:                }
                    252:        } else {
                    253:                end = size;
                    254:                show = ConfigureParams.Debugger.nDisasmLines;
                    255:                if (!show || show > active) {
                    256:                        show = active;
                    257:                }
                    258:        }
                    259: 
                    260:        /* get/change columns */
                    261:        Disasm_GetColumns(oldcols);
                    262:        Disasm_DisableColumn(DISASM_COLUMN_HEXDUMP, oldcols, newcols);
                    263:        Disasm_SetColumns(newcols);
                    264: 
                    265:        fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <sum of i-cache misses>)\n", out);
                    266: 
                    267:        nextpc = 0;
                    268:        idx = address2index(lower);
                    269:        for (shown = 0; shown < show && idx < end; idx++) {
                    270:                if (!data[idx].count) {
                    271:                        continue;
                    272:                }
                    273:                addr = index2address(idx);
                    274:                if (addr != nextpc && nextpc) {
                    275:                        fprintf(out, "[...]\n");
                    276:                }
                    277:                symbol = Symbols_GetByCpuAddress(addr);
                    278:                if (symbol) {
                    279:                        fprintf(out, "%s:\n", symbol);
                    280:                }
                    281:                /* NOTE: column setup works only with 68kDisass disasm engine! */
                    282:                Disasm(out, addr, &nextpc, 1);
                    283:                shown++;
                    284:        }
                    285:        printf("Disassembled %d (of active %d) CPU addresses.\n", shown, active);
                    286: 
                    287:        /* restore disassembly columns */
                    288:        Disasm_SetColumns(oldcols);
                    289:        return nextpc;
                    290: }
                    291: 
                    292: /**
                    293:  * remove all disassembly columns except instruction ones.
                    294:  * data needed to restore columns is stored to "oldcols"
                    295:  */
                    296: static void leave_instruction_column(int *oldcols)
                    297: {
                    298:        int i, newcols[DISASM_COLUMNS];
                    299: 
                    300:        Disasm_GetColumns(oldcols);
                    301:        for (i = 0; i < DISASM_COLUMNS; i++) {
                    302:                if (i == DISASM_COLUMN_OPCODE || i == DISASM_COLUMN_OPERAND) {
                    303:                        continue;
                    304:                }
                    305:                Disasm_DisableColumn(i, oldcols, newcols);
                    306:                oldcols = newcols;
                    307:        }
                    308:        Disasm_SetColumns(newcols);
                    309: }
                    310: 
                    311: #if ENABLE_WINUAE_CPU
                    312: /**
                    313:  * compare function for qsort() to sort CPU profile data by instruction cache misses.
                    314:  */
                    315: static int cmp_cpu_misses(const void *p1, const void *p2)
                    316: {
                    317:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].misses;
                    318:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].misses;
                    319:        if (count1 > count2) {
                    320:                return -1;
                    321:        }
                    322:        if (count1 < count2) {
                    323:                return 1;
                    324:        }
                    325:        return 0;
                    326: }
                    327: 
                    328: /**
                    329:  * Sort CPU profile data addresses by instruction cache misses and show the results.
                    330:  */
                    331: void Profile_CpuShowMisses(int show)
                    332: {
                    333:        int active;
                    334:        int oldcols[DISASM_COLUMNS];
                    335:        Uint32 *sort_arr, *end, addr, nextpc;
                    336:        cpu_profile_item_t *data = cpu_profile.data;
                    337:        float percentage;
                    338:        Uint32 count;
                    339: 
                    340:        if (!cpu_profile.all.misses) {
                    341:                fprintf(stderr, "No CPU cache miss information available.\n");
                    342:                return;
                    343:        }
                    344: 
                    345:        active = cpu_profile.active;
                    346:        sort_arr = cpu_profile.sort_arr;
                    347:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_misses);
                    348: 
                    349:        leave_instruction_column(oldcols);
                    350: 
                    351:        printf("addr:\t\tmisses:\n");
                    352:        show = (show < active ? show : active);
                    353:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    354:                addr = index2address(*sort_arr);
                    355:                count = data[*sort_arr].misses;
                    356:                percentage = 100.0*count/cpu_profile.all.misses;
                    357:                printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
                    358:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    359:                Disasm(stdout, addr, &nextpc, 1);
                    360:        }
                    361:        printf("%d CPU addresses listed.\n", show);
                    362: 
                    363:        Disasm_SetColumns(oldcols);
                    364: }
                    365: #else
                    366: void Profile_CpuShowMisses(int show) {
                    367:        fprintf(stderr, "Cache misses are recorded only with WinUAE CPU.\n");
                    368: }
                    369: #endif
                    370: 
                    371: 
                    372: /**
                    373:  * compare function for qsort() to sort CPU profile data by cycles counts.
                    374:  */
                    375: static int cmp_cpu_cycles(const void *p1, const void *p2)
                    376: {
                    377:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].cycles;
                    378:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].cycles;
                    379:        if (count1 > count2) {
                    380:                return -1;
                    381:        }
                    382:        if (count1 < count2) {
                    383:                return 1;
                    384:        }
                    385:        return 0;
                    386: }
                    387: 
                    388: /**
                    389:  * Sort CPU profile data addresses by cycle counts and show the results.
                    390:  */
                    391: void Profile_CpuShowCycles(int show)
                    392: {
                    393:        int active;
                    394:        int oldcols[DISASM_COLUMNS];
                    395:        Uint32 *sort_arr, *end, addr, nextpc;
                    396:        cpu_profile_item_t *data = cpu_profile.data;
                    397:        float percentage;
                    398:        Uint32 count;
                    399: 
                    400:        if (!data) {
                    401:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
                    402:                return;
                    403:        }
                    404: 
                    405:        active = cpu_profile.active;
                    406:        sort_arr = cpu_profile.sort_arr;
                    407:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_cycles);
                    408: 
                    409:        leave_instruction_column(oldcols);
                    410: 
                    411:        printf("addr:\t\tcycles:\n");
                    412:        show = (show < active ? show : active);
                    413:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    414:                addr = index2address(*sort_arr);
                    415:                count = data[*sort_arr].cycles;
                    416:                percentage = 100.0*count/cpu_profile.all.cycles;
                    417:                printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
                    418:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    419:                Disasm(stdout, addr, &nextpc, 1);
                    420:        }
                    421:        printf("%d CPU addresses listed.\n", show);
                    422: 
                    423:        Disasm_SetColumns(oldcols);
                    424: }
                    425: 
                    426: /**
                    427:  * compare function for qsort() to sort CPU profile data by descending
                    428:  * address access counts.
                    429:  */
                    430: static int cmp_cpu_count(const void *p1, const void *p2)
                    431: {
                    432:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].count;
                    433:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].count;
                    434:        if (count1 > count2) {
                    435:                return -1;
                    436:        }
                    437:        if (count1 < count2) {
                    438:                return 1;
                    439:        }
                    440:        return 0;
                    441: }
                    442: 
                    443: /**
                    444:  * Sort CPU profile data addresses by call counts and show the results.
                    445:  * If symbols are requested and symbols are loaded, show (only) addresses
                    446:  * matching a symbol.
                    447:  */
                    448: void Profile_CpuShowCounts(int show, bool only_symbols)
                    449: {
                    450:        cpu_profile_item_t *data = cpu_profile.data;
                    451:        int symbols, matched, active;
                    452:        int oldcols[DISASM_COLUMNS];
                    453:        Uint32 *sort_arr, *end, addr, nextpc;
                    454:        const char *name;
                    455:        float percentage;
                    456:        Uint32 count;
                    457: 
                    458:        if (!data) {
                    459:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
                    460:                return;
                    461:        }
                    462:        active = cpu_profile.active;
                    463:        show = (show < active ? show : active);
                    464: 
                    465:        sort_arr = cpu_profile.sort_arr;
                    466:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_count);
                    467: 
                    468:        if (!only_symbols) {
                    469:                leave_instruction_column(oldcols);
                    470:                printf("addr:\t\tcount:\n");
                    471:                for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    472:                        addr = index2address(*sort_arr);
                    473:                        count = data[*sort_arr].count;
                    474:                        percentage = 100.0*count/cpu_profile.all.count;
                    475:                        printf("0x%06x\t%5.2f%%\t%d%s\t",
                    476:                               addr, percentage, count,
                    477:                               count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    478:                        Disasm(stdout, addr, &nextpc, 1);
                    479:                }
                    480:                printf("%d CPU addresses listed.\n", show);
                    481:                Disasm_SetColumns(oldcols);
                    482:                return;
                    483:        }
                    484: 
                    485:        symbols = Symbols_CpuCount();
                    486:        if (!symbols) {
                    487:                fprintf(stderr, "ERROR: no CPU symbols loaded!\n");
                    488:                return;
                    489:        }
                    490:        matched = 0;    
                    491: 
                    492:        leave_instruction_column(oldcols);
                    493: 
                    494:        printf("addr:\t\tcount:\t\tsymbol:\n");
                    495:        for (end = sort_arr + active; sort_arr < end; sort_arr++) {
                    496: 
                    497:                addr = index2address(*sort_arr);
                    498:                name = Symbols_GetByCpuAddress(addr);
                    499:                if (!name) {
                    500:                        continue;
                    501:                }
                    502:                count = data[*sort_arr].count;
                    503:                percentage = 100.0*count/cpu_profile.all.count;
                    504:                printf("0x%06x\t%5.2f%%\t%d\t%s%s\t",
                    505:                       addr, percentage, count, name,
                    506:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    507:                Disasm(stdout, addr, &nextpc, 1);
                    508: 
                    509:                matched++;
                    510:                if (matched >= show || matched >= symbols) {
                    511:                        break;
                    512:                }
                    513:        }
                    514:        printf("%d CPU symbols listed.\n", matched);
                    515: 
                    516:        Disasm_SetColumns(oldcols);
                    517: }
                    518: 
                    519: 
                    520: static const char * addr2name(Uint32 addr, Uint64 *total)
                    521: {
                    522:        Uint32 idx = address2index(addr);
                    523:        *total = cpu_profile.data[idx].count;
                    524:        return Symbols_GetByCpuAddress(addr);
                    525: }
                    526: 
                    527: /**
                    528:  * Output CPU callers info to given file.
                    529:  */
                    530: void Profile_CpuShowCallers(FILE *fp)
                    531: {
                    532:        Profile_ShowCallers(fp, cpu_callinfo.sites, cpu_callinfo.site, addr2name);
                    533: }
                    534: 
                    535: /**
                    536:  * Save CPU profile information to given file.
                    537:  */
                    538: void Profile_CpuSave(FILE *out)
                    539: {
                    540:        Uint32 text;
                    541:        fputs("Field names:\tExecuted instructions, Used cycles, Instruction cache misses\n", out);
                    542:        /* (Python) pegexp that matches address and all describled fields from disassembly:
                    543:         * $<hex>  :  <ASM>  <percentage>% (<count>, <cycles>, <misses>)
                    544:         * $e5af38 :   rts           0.00% (12, 0, 12)
                    545:         */
                    546:        fputs("Field regexp:\t^\\$([0-9a-f]+) :.*% \\((.*)\\)$\n", out);
                    547:        /* some information for interpreting the addresses */
                    548:        fprintf(out, "ROM_TOS:\t0x%06x-0x%06x\n", TosAddress, TosAddress + TosSize);
                    549:        text = DebugInfo_GetTEXT();
                    550:        if (text < TosAddress) {
                    551:                fprintf(out, "PROGRAM_TEXT:\t0x%06x-0x%06x\n", text, DebugInfo_GetTEXTEnd());
                    552:        }
                    553:        fprintf(out, "CARTRIDGE:\t0xfa0000-0xfc0000\n");
                    554:        Profile_CpuShowAddresses(0, 0xFC0000-2, out);
                    555:        Profile_CpuShowCallers(out);
                    556: }
                    557: 
                    558: /* ------------------ CPU profile control ----------------- */
                    559: 
                    560: /**
                    561:  * Initialize CPU profiling when necessary.  Return true if profiling.
                    562:  */
                    563: bool Profile_CpuStart(void)
                    564: {
                    565:        int size;
                    566: 
                    567:        Profile_FreeCallinfo(&(cpu_callinfo));
                    568:        if (cpu_profile.sort_arr) {
                    569:                /* remove previous results */
                    570:                free(cpu_profile.sort_arr);
                    571:                free(cpu_profile.data);
                    572:                cpu_profile.sort_arr = NULL;
                    573:                cpu_profile.data = NULL;
                    574:                printf("Freed previous CPU profile buffers.\n");
                    575:        }
                    576:        if (!cpu_profile.enabled) {
                    577:                return false;
                    578:        }
                    579:        /* zero everything */
                    580:        memset(&cpu_profile, 0, sizeof(cpu_profile));
                    581: 
                    582:        /* Shouldn't change within same debug session */
                    583:        size = (STRamEnd + 0x20000 + TosSize) / 2;
                    584: 
                    585:        /* Add one entry for catching invalid PC values */
                    586:        cpu_profile.data = calloc(size + 1, sizeof(*cpu_profile.data));
                    587:        if (!cpu_profile.data) {
                    588:                perror("ERROR, new CPU profile buffer alloc failed");
                    589:                return false;
                    590:        }
                    591:        printf("Allocated CPU profile buffer (%d MB).\n",
                    592:               (int)sizeof(*cpu_profile.data)*size/(1024*1024));
                    593:        cpu_profile.size = size;
                    594: 
                    595:        Profile_AllocCallinfo(&(cpu_callinfo), Symbols_CpuCount(), "CPU");
                    596: 
                    597:        /* special hack for EmuTOS */
                    598:        etos_switcher = PC_UNDEFINED;
                    599:        if (cpu_callinfo.sites && bIsEmuTOS &&
                    600:            (!Symbols_GetCpuAddress(SYMTYPE_TEXT, "_switchto", &etos_switcher) || etos_switcher < TosAddress)) {
                    601:                etos_switcher = PC_UNDEFINED;
                    602:        }
                    603: 
                    604:        cpu_profile.prev_cycles = Cycles_GetCounter(CYCLES_COUNTER_CPU);
                    605:        cpu_profile.prev_family = OpcodeFamily;
                    606:        cpu_profile.prev_pc = M68000_GetPC() & 0xffffff;
                    607: 
                    608:        cpu_profile.disasm_addr = 0;
                    609:        cpu_profile.processed = false;
                    610:        cpu_profile.enabled = true;
                    611:        return cpu_profile.enabled;
                    612: }
                    613: 
                    614: /**
                    615:  * return true if pc could be next instruction for previous pc
                    616:  */
                    617: static bool is_prev_instr(Uint32 prev_pc, Uint32 pc)
                    618: {
                    619:        /* just moved to next instruction (1-2 words)? */
                    620:        if (prev_pc < pc && (pc - prev_pc) <= 10) {
                    621:                return true;
                    622:        }
                    623:        return false;
                    624: }
                    625: 
                    626: /**
                    627:  * return caller instruction type classification
                    628:  */
                    629: static calltype_t cpu_opcode_type(int family, Uint32 prev_pc, Uint32 pc)
                    630: {
                    631:        switch (family) {
                    632: 
                    633:        case i_JSR:
                    634:        case i_BSR:
                    635:                return CALL_SUBROUTINE;
                    636: 
                    637:        case i_RTS:
                    638:        case i_RTR:
                    639:        case i_RTD:
                    640:                return CALL_SUBRETURN;
                    641: 
                    642:        case i_JMP:     /* often used also for "inlined" function calls... */
                    643:        case i_Bcc:     /* both BRA & BCC */
                    644:        case i_FBcc:
                    645:        case i_DBcc:
                    646:        case i_FDBcc:
                    647:                return CALL_BRANCH;
                    648: 
                    649:        case i_TRAP:
                    650:        case i_TRAPV:
                    651:        case i_TRAPcc:
                    652:        case i_FTRAPcc:
                    653:        case i_STOP:
                    654:        case i_ILLG:
                    655:        case i_CHK:
                    656:        case i_CHK2:
                    657:        case i_BKPT:
                    658:                return CALL_EXCEPTION;
                    659: 
                    660:        case i_RTE:
                    661:                return CALL_EXCRETURN;
                    662:        }
                    663:        /* just moved to next instruction? */
                    664:        if (is_prev_instr(prev_pc, pc)) {
                    665:                return CALL_NEXT;
                    666:        }
                    667:        return CALL_UNKNOWN;
                    668: }
                    669: 
                    670: /**
                    671:  * If call tracking is enabled (there are symbols), collect
                    672:  * information about subroutine and other calls, and their costs.
                    673:  * 
                    674:  * Like with profile data, caller info checks need to be for previous
                    675:  * instruction, that's why "pc" argument for this function actually
                    676:  * needs to be previous PC.
                    677:  */
                    678: static void collect_calls(Uint32 pc, counters_t *counters)
                    679: {
                    680:        calltype_t flag;
                    681:        int idx, family;
                    682:        Uint32 prev_pc, caller_pc;
                    683: 
                    684:        family = cpu_profile.prev_family;
                    685:        cpu_profile.prev_family = OpcodeFamily;
                    686: 
                    687:        prev_pc = cpu_callinfo.prev_pc;
                    688:        cpu_callinfo.prev_pc = pc;
                    689:        caller_pc = PC_UNDEFINED;
                    690: 
                    691:        /* address is return address for last subroutine call? */
                    692:        if (unlikely(pc == cpu_callinfo.return_pc) && likely(cpu_callinfo.depth)) {
                    693: 
                    694:                flag = cpu_opcode_type(family, prev_pc, pc);
                    695:                /* previous address can be exception return (RTE) if exception
                    696:                 * occurred right after returning from subroutine call (RTS)
                    697:                 */
                    698:                if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) {
                    699:                        caller_pc = Profile_CallEnd(&cpu_callinfo, counters);
                    700:                } else {
                    701: #if DEBUG
                    702:                        /* although at return address, it didn't return yet,
                    703:                         * e.g. because there was a jsr or jump to return address
                    704:                         */
                    705:                        Uint32 nextpc;
                    706:                        fprintf(stderr, "WARNING: subroutine call returned 0x%x -> 0x%x, not though RTS!\n", prev_pc, pc);
                    707:                        Disasm(stderr, prev_pc, &nextpc, 1);
                    708: #endif
                    709:                }
                    710:                /* next address might be another function, so need to fall through */
                    711:        }
                    712: 
                    713:        /* address is one which we're tracking? */
                    714:        idx = Symbols_GetCpuAddressIndex(pc);
                    715:        if (unlikely(idx >= 0)) {
                    716: 
                    717:                flag = cpu_opcode_type(family, prev_pc, pc);
                    718:                if (flag == CALL_SUBROUTINE) {
                    719:                        /* special HACK for for EmuTOS AES switcher which
                    720:                         * changes stack content to remove itself from call
                    721:                         * stack and uses RTS for subroutine *calls*, not
                    722:                         * for returning from them.
                    723:                         *
                    724:                         * It wouldn't be reliable to detect calls from it,
                    725:                         * so I'm making call *to* it show up as branch, to
                    726:                         * keep callstack depth correct.
                    727:                         */
                    728:                        if (unlikely(pc == etos_switcher)) {
                    729:                                flag = CALL_BRANCH;
                    730:                        } else if (unlikely(prev_pc == PC_UNDEFINED)) {
                    731:                                /* if first profiled instruction
                    732:                                 * is subroutine call, it doesn't have
                    733:                                 * valid prev_pc value stored
                    734:                                 */
                    735:                                cpu_callinfo.return_pc = PC_UNDEFINED;
                    736:                                fprintf(stderr, "WARNING: previous PC from callinfo for 0x%d is undefined!\n", pc);
                    737: #if DEBUG
                    738:                                skip_assert = true;
                    739:                                DebugUI(REASON_CPU_EXCEPTION);
                    740: #endif
                    741:                        } else {
                    742:                                /* slow! */
                    743:                                cpu_callinfo.return_pc = Disasm_GetNextPC(prev_pc);
                    744:                        }
                    745:                } else if (caller_pc != PC_UNDEFINED) {
                    746:                        /* returned from function to first instrction of another symbol:
                    747:                         *      0xf384  jsr some_function
                    748:                         *      other_symbol:
                    749:                         *      0f3x8a  some_instruction
                    750:                         * -> change return instruction address to
                    751:                         *    address of what did the returned call.
                    752:                         */
                    753:                        prev_pc = caller_pc;
                    754:                        assert(is_prev_instr(prev_pc, pc));
                    755:                        flag = CALL_NEXT;
                    756:                }
                    757:                Profile_CallStart(idx, &cpu_callinfo, prev_pc, flag, pc, counters);
                    758:        }
                    759: }
                    760: 
                    761: /**
                    762:  * Update CPU cycle and count statistics for PC address.
                    763:  *
                    764:  * This gets called after instruction has executed and PC
                    765:  * has advanced to next instruction.
                    766:  */
                    767: void Profile_CpuUpdate(void)
                    768: {
                    769:        counters_t *counters = &(cpu_profile.all);
                    770:        Uint32 pc, prev_pc, idx, cycles, misses;
                    771:        cpu_profile_item_t *prev;
                    772: 
                    773:        prev_pc = cpu_profile.prev_pc;
                    774:        /* PC may have extra bits, they need to be masked away as
                    775:         * emulation itself does that too when PC value is used
                    776:         */
                    777:        cpu_profile.prev_pc = pc = M68000_GetPC() & 0xffffff;
                    778: 
                    779:        idx = address2index(prev_pc);
                    780:        assert(idx <= cpu_profile.size);
                    781:        prev = cpu_profile.data + idx;
                    782: 
                    783:        if (likely(prev->count < MAX_CPU_PROFILE_VALUE)) {
                    784:                prev->count++;
                    785:        }
                    786: 
                    787: #if USE_CYCLES_COUNTER
                    788:        /* Confusingly, with DSP enabled, cycle counter is for this instruction,
                    789:         * without DSP enabled, it's a monotonically increasing counter.
                    790:         */
                    791:        if (bDspEnabled) {
                    792:                cycles = Cycles_GetCounter(CYCLES_COUNTER_CPU);
                    793:        } else {
                    794:                Uint32 newcycles = Cycles_GetCounter(CYCLES_COUNTER_CPU);
                    795:                cycles = newcycles - cpu_profile.prev_cycles;
                    796:                cpu_profile.prev_cycles = newcycles;
                    797:        }
                    798: #else
                    799:        cycles = CurrentInstrCycles + nWaitStateCycles;
                    800: #endif
                    801:        /* cycles are based on 8Mhz clock, change them to correct one */
                    802:        cycles <<= nCpuFreqShift;
                    803: 
                    804:        if (likely(prev->cycles < MAX_CPU_PROFILE_VALUE - cycles)) {
                    805:                prev->cycles += cycles;
                    806:        } else {
                    807:                prev->cycles = MAX_CPU_PROFILE_VALUE;
                    808:        }
                    809: 
                    810: #if ENABLE_WINUAE_CPU
                    811:        misses = CpuInstruction.iCacheMisses;
                    812:        assert(misses < MAX_MISS);
                    813:        cpu_profile.miss_counts[misses]++;
                    814:        if (likely(prev->misses < MAX_CPU_PROFILE_VALUE - misses)) {
                    815:                prev->misses += misses;
                    816:        } else {
                    817:                prev->misses = MAX_CPU_PROFILE_VALUE;
                    818:        }
                    819: #else
                    820:        misses = 0;
                    821: #endif
                    822:        if (cpu_callinfo.sites) {
                    823:                collect_calls(prev_pc, counters);
                    824:        }
                    825:        /* counters are increased after caller info is processed,
                    826:         * otherwise cost for the instruction calling the callee
                    827:         * doesn't get accounted to caller (but callee).
                    828:         */
                    829:        counters->misses += misses;
                    830:        counters->cycles += cycles;
                    831:        counters->count++;
                    832: 
                    833: #if DEBUG
                    834:        if (unlikely(OpcodeFamily == 0)) {
                    835:                Uint32 nextpc;
                    836:                fputs("WARNING: instruction opcode family is zero (=i_ILLG) for instruction:\n", stderr);
                    837:                Disasm(stderr, prev_pc, &nextpc, 1);
                    838:        }
                    839:        /* catch too large (and negative) cycles for other than STOP instruction */
                    840:        if (unlikely(cycles > 512 && OpcodeFamily != i_STOP)) {
                    841:                Uint32 nextpc;
                    842:                fprintf(stderr, "WARNING: cycles %d > 512:\n", cycles);
                    843:                Disasm(stderr, prev_pc, &nextpc, 1);
                    844:        }
                    845:        if (unlikely(cycles == 0)) {
                    846:                Uint32 nextpc;
                    847:                fputs("WARNING: Zero cycles for an opcode:\n", stderr);
                    848:                Disasm(stderr, prev_pc, &nextpc, 1);
                    849:        }
                    850: #endif
                    851: }
                    852: 
                    853: 
                    854: /**
                    855:  * Helper for accounting CPU profile area item.
                    856:  */
                    857: static void update_area_item(profile_area_t *area, Uint32 addr, cpu_profile_item_t *item)
                    858: {
                    859:        Uint32 cycles = item->cycles;
                    860:        Uint32 count = item->count;
                    861: 
                    862:        if (!count) {
                    863:                return;
                    864:        }
                    865:        area->counters.count += count;
                    866:        area->counters.misses += item->misses;
                    867:        area->counters.cycles += cycles;
                    868: 
                    869:        if (cycles == MAX_CPU_PROFILE_VALUE) {
                    870:                area->overflow = true;
                    871:        }
                    872:        if (addr < area->lowest) {
                    873:                area->lowest = addr;
                    874:        }
                    875:        area->highest = addr;
                    876: 
                    877:        area->active++;
                    878: }
                    879: 
                    880: /**
                    881:  * Helper for collecting CPU profile area statistics.
                    882:  */
                    883: static Uint32 update_area(profile_area_t *area, Uint32 start, Uint32 end)
                    884: {
                    885:        cpu_profile_item_t *item;
                    886:        Uint32 addr;
                    887: 
                    888:        memset(area, 0, sizeof(profile_area_t));
                    889:        area->lowest = cpu_profile.size;
                    890: 
                    891:        item = &(cpu_profile.data[start]);
                    892:        for (addr = start; addr < end; addr++, item++) {
                    893:                update_area_item(area, addr, item);
                    894:        }
                    895:        return addr;
                    896: }
                    897: 
                    898: /**
                    899:  * Helper for initializing CPU profile area sorting indexes.
                    900:  */
                    901: static Uint32* index_area(profile_area_t *area, Uint32 *sort_arr)
                    902: {
                    903:        cpu_profile_item_t *item;
                    904:        Uint32 addr;
                    905: 
                    906:        item = &(cpu_profile.data[area->lowest]);
                    907:        for (addr = area->lowest; addr <= area->highest; addr++, item++) {
                    908:                if (item->count) {
                    909:                        *sort_arr++ = addr;
                    910:                }
                    911:        }
                    912:        return sort_arr;
                    913: }
                    914: 
                    915: /**
                    916:  * Stop and process the CPU profiling data; collect stats and
                    917:  * prepare for more optimal sorting.
                    918:  */
                    919: void Profile_CpuStop(void)
                    920: {
                    921:        Uint32 *sort_arr, next;
                    922:        int active;
                    923: 
                    924:        if (cpu_profile.processed || !cpu_profile.enabled) {
                    925:                return;
                    926:        }
                    927:        /* user didn't change RAM or TOS size in the meanwhile? */
                    928:        assert(cpu_profile.size == (STRamEnd + 0x20000 + TosSize) / 2);
                    929: 
                    930:        Profile_FinalizeCalls(&(cpu_callinfo), &(cpu_profile.all), Symbols_GetByCpuAddress);
                    931: 
                    932:        /* find lowest and highest addresses executed etc */
                    933:        next = update_area(&cpu_profile.ram, 0, STRamEnd/2);
                    934:        next = update_area(&cpu_profile.tos, next, (STRamEnd + TosSize)/2);
                    935:        next = update_area(&cpu_profile.rom, next, cpu_profile.size);
                    936:        assert(next == cpu_profile.size);
                    937: 
                    938: #if DEBUG
                    939:        if (skip_assert) {
                    940:                skip_assert = false;
                    941:        } else
                    942: #endif
                    943:        {
                    944:                assert(cpu_profile.all.misses == cpu_profile.ram.counters.misses + cpu_profile.tos.counters.misses + cpu_profile.rom.counters.misses);
                    945:                assert(cpu_profile.all.cycles == cpu_profile.ram.counters.cycles + cpu_profile.tos.counters.cycles + cpu_profile.rom.counters.cycles);
                    946:                assert(cpu_profile.all.count == cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count);
                    947:        }
                    948: 
                    949:        /* allocate address array for sorting */
                    950:        active = cpu_profile.ram.active + cpu_profile.rom.active + cpu_profile.tos.active;
                    951:        sort_arr = calloc(active, sizeof(*sort_arr));
                    952: 
                    953:        if (!sort_arr) {
                    954:                perror("ERROR: allocating CPU profile address data");
                    955:                free(cpu_profile.data);
                    956:                cpu_profile.data = NULL;
                    957:                return;
                    958:        }
                    959:        printf("Allocated CPU profile address buffer (%d KB).\n",
                    960:               (int)sizeof(*sort_arr)*(active+512)/1024);
                    961:        cpu_profile.sort_arr = sort_arr;
                    962:        cpu_profile.active = active;
                    963: 
                    964:        /* and fill addresses for used instructions... */
                    965:        sort_arr = index_area(&cpu_profile.ram, sort_arr);
                    966:        sort_arr = index_area(&cpu_profile.tos, sort_arr);
                    967:        sort_arr = index_area(&cpu_profile.rom, sort_arr);
                    968:        assert(sort_arr == cpu_profile.sort_arr + cpu_profile.active);
                    969:        //printf("%d/%d/%d\n", area->active, sort_arr-cpu_profile.sort_arr, active);
                    970: 
                    971:        Profile_CpuShowStats();
                    972:        cpu_profile.processed = true;
                    973: }
                    974: 
                    975: /**
                    976:  * Get pointers to CPU profile enabling and disasm address variables
                    977:  * for updating them (in parser).
                    978:  */
                    979: void Profile_CpuGetPointers(bool **enabled, Uint32 **disasm_addr)
                    980: {
                    981:        *disasm_addr = &cpu_profile.disasm_addr;
                    982:        *enabled = &cpu_profile.enabled;
                    983: }
                    984: 
                    985: /**
                    986:  * Get callinfo & symbol search pointers for stack walking.
                    987:  */
                    988: void Profile_CpuGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32))
                    989: {
                    990:        *callinfo = &(cpu_callinfo);
                    991:        *get_symbol = Symbols_GetByCpuAddress;
                    992: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.