Annotation of hatari/src/debug/profilecpu.c, revision 1.1

1.1     ! root        1: /*
        !             2:  * Hatari - profilecpu.c
        !             3:  * 
        !             4:  * Copyright (C) 2010-2013 by Eero Tamminen
        !             5:  *
        !             6:  * This file is distributed under the GNU General Public License, version 2
        !             7:  * or at your option any later version. Read the file gpl.txt for details.
        !             8:  *
        !             9:  * profilecpu.c - functions for profiling CPU and showing the results.
        !            10:  */
        !            11: const char Profilecpu_fileid[] = "Hatari profilecpu.c : " __DATE__ " " __TIME__;
        !            12: 
        !            13: #include <stdio.h>
        !            14: #include <inttypes.h>
        !            15: #include <assert.h>
        !            16: #include "main.h"
        !            17: #include "configuration.h"
        !            18: #include "clocks_timings.h"
        !            19: #include "debugInfo.h"
        !            20: #include "dsp.h"
        !            21: #include "m68000.h"
        !            22: #include "68kDisass.h"
        !            23: #include "profile.h"
        !            24: #include "profile_priv.h"
        !            25: #include "stMemory.h"
        !            26: #include "symbols.h"
        !            27: #include "tos.h"
        !            28: 
        !            29: /* if non-zero, output (more) warnings on suspicious:
        !            30:  * - cycle/instruction counts
        !            31:  * - PC switches
        !            32:  * And drop to debugger on invalid PC addresses.
        !            33:  */
        !            34: #define DEBUG 0
        !            35: #if DEBUG
        !            36: #include "debugui.h"
        !            37: static bool skip_assert;
        !            38: #endif
        !            39: 
        !            40: static callinfo_t cpu_callinfo;
        !            41: 
        !            42: /* This is relevant with WinUAE CPU core:
        !            43:  * - the default cycle exact variant needs this define to be non-zero
        !            44:  * - non-cycle exact and MMU variants need this define to be 0
        !            45:  *   for cycle counts to make any sense
        !            46:  */
        !            47: #define USE_CYCLES_COUNTER 1
        !            48: 
        !            49: #define MAX_CPU_PROFILE_VALUE 0xFFFFFFFF
        !            50: 
        !            51: typedef struct {
        !            52:        Uint32 count;   /* how many times this address instrcution is executed */
        !            53:        Uint32 cycles;  /* how many CPU cycles was taken at this address */
        !            54:        Uint32 misses;  /* how many CPU cache misses happened at this address */
        !            55: } cpu_profile_item_t;
        !            56: 
        !            57: #define MAX_MISS 4
        !            58: 
        !            59: static struct {
        !            60:        counters_t all;       /* total counts for all areas */
        !            61:        Uint32 miss_counts[MAX_MISS];  /* cache miss counts */
        !            62:        cpu_profile_item_t *data; /* profile data items */
        !            63:        Uint32 size;          /* number of allocated profile data items */
        !            64:        profile_area_t ram;   /* normal RAM stats */
        !            65:        profile_area_t rom;   /* cartridge ROM stats */
        !            66:        profile_area_t tos;   /* ROM TOS stats */
        !            67:        int active;           /* number of active data items in all areas */
        !            68:        Uint32 *sort_arr;     /* data indexes used for sorting */
        !            69:        Uint32 prev_cycles;   /* previous instruction cycles counter */
        !            70:        Uint32 prev_pc;       /* previous instruction address */
        !            71:        int prev_family;      /* previous instruction opcode family */
        !            72:        Uint32 disasm_addr;   /* 'addresses' command start address */
        !            73:        bool processed;       /* true when data is already processed */
        !            74:        bool enabled;         /* true when profiling enabled */
        !            75: } cpu_profile;
        !            76: 
        !            77: /* special hack for EmuTOS */
        !            78: static Uint32 etos_switcher;
        !            79: 
        !            80: 
        !            81: /* ------------------ CPU profile address mapping ----------------- */
        !            82: 
        !            83: /**
        !            84:  * convert Atari memory address to sorting array profile data index.
        !            85:  */
        !            86: static inline Uint32 address2index(Uint32 pc)
        !            87: {
        !            88:        if (unlikely(pc & 1)) {
        !            89:                fprintf(stderr, "WARNING: odd CPU profile instruction address 0x%x!\n", pc);
        !            90: #if DEBUG
        !            91:                skip_assert = true;
        !            92:                DebugUI(REASON_CPU_EXCEPTION);
        !            93: #endif
        !            94:        }
        !            95:        if (pc >= TosAddress && pc < TosAddress + TosSize) {
        !            96:                /* TOS, put it after RAM data */
        !            97:                pc = pc - TosAddress + STRamEnd;
        !            98: 
        !            99:        } else if (pc >= 0xFA0000 && pc < 0xFC0000) {
        !           100:                /* ROM, put it after RAM & TOS data */
        !           101:                pc = pc - 0xFA0000 + STRamEnd + TosSize;
        !           102: 
        !           103:        } else {
        !           104:                /* if in RAM, use as-is */
        !           105:                if (unlikely(pc >= STRamEnd)) {
        !           106:                        fprintf(stderr, "WARNING: 'invalid' CPU PC profile instruction address 0x%x!\n", pc);
        !           107:                        /* extra entry at end is reserved for invalid PC values */
        !           108:                        pc = STRamEnd + TosSize + 0x20000;
        !           109: #if DEBUG
        !           110:                        skip_assert = true;
        !           111:                        DebugUI(REASON_CPU_EXCEPTION);
        !           112: #endif
        !           113:                }
        !           114:        }
        !           115:        /* CPU instructions are at even addresses, save space by halving */
        !           116:        return (pc >> 1);
        !           117: }
        !           118: 
        !           119: /**
        !           120:  * convert sorting array profile data index to Atari memory address.
        !           121:  */
        !           122: static Uint32 index2address(Uint32 idx)
        !           123: {
        !           124:        idx <<= 1;
        !           125:        /* RAM */
        !           126:        if (idx < STRamEnd) {
        !           127:                return idx;
        !           128:        }
        !           129:        /* TOS */
        !           130:        idx -= STRamEnd;
        !           131:        if (idx < TosSize) {
        !           132:                return idx + TosAddress;
        !           133:        }
        !           134:        /* ROM */
        !           135:        return idx - TosSize + 0xFA0000;
        !           136: }
        !           137: 
        !           138: /* ------------------ CPU profile results ----------------- */
        !           139: 
        !           140: /**
        !           141:  * Get CPU cycles, count and count percentage for given address.
        !           142:  * Return true if data was available and non-zero, false otherwise.
        !           143:  */
        !           144: bool Profile_CpuAddressData(Uint32 addr, float *percentage, Uint32 *count, Uint32 *cycles, Uint32 *misses)
        !           145: {
        !           146:        Uint32 idx;
        !           147:        if (!cpu_profile.data) {
        !           148:                return false;
        !           149:        }
        !           150:        idx = address2index(addr);
        !           151:        *misses = cpu_profile.data[idx].misses;
        !           152:        *cycles = cpu_profile.data[idx].cycles;
        !           153:        *count = cpu_profile.data[idx].count;
        !           154:        if (cpu_profile.all.count) {
        !           155:                *percentage = 100.0*(*count)/cpu_profile.all.count;
        !           156:        } else {
        !           157:                *percentage = 0.0;
        !           158:        }
        !           159:        return (*count > 0);
        !           160: }
        !           161: 
        !           162: /**
        !           163:  * Helper to show statistics for specified CPU profile area.
        !           164:  */
        !           165: static void show_cpu_area_stats(profile_area_t *area)
        !           166: {
        !           167:        if (!area->active) {
        !           168:                fprintf(stderr, "- no activity\n");
        !           169:                return;
        !           170:        }
        !           171:        fprintf(stderr, "- active address range:\n  0x%06x-0x%06x\n",
        !           172:                index2address(area->lowest),
        !           173:                index2address(area->highest));
        !           174:        fprintf(stderr, "- active instruction addresses:\n  %d (%.2f%% of all)\n",
        !           175:                area->active,
        !           176:                100.0 * area->active / cpu_profile.active);
        !           177:        fprintf(stderr, "- executed instructions:\n  %"PRIu64" (%.2f%% of all)\n",
        !           178:                area->counters.count,
        !           179:                100.0 * area->counters.count / cpu_profile.all.count);
        !           180: #if ENABLE_WINUAE_CPU
        !           181:        if (cpu_profile.all.misses) {   /* CPU cache in use? */
        !           182:                fprintf(stderr, "- instruction cache misses:\n  %"PRIu64" (%.2f%% of all)\n",
        !           183:                        area->counters.misses,
        !           184:                        100.0 * area->counters.misses / cpu_profile.all.misses);
        !           185:        }
        !           186: #endif
        !           187:        fprintf(stderr, "- used cycles:\n  %"PRIu64" (%.2f%% of all)\n  = %.5fs\n",
        !           188:                area->counters.cycles,
        !           189:                100.0 * area->counters.cycles / cpu_profile.all.cycles,
        !           190:                (double)area->counters.cycles / MachineClocks.CPU_Freq);
        !           191:        if (area->overflow) {
        !           192:                fprintf(stderr, "  *** COUNTER OVERFLOW! ***\n");
        !           193:        }
        !           194: }
        !           195: 
        !           196: 
        !           197: /**
        !           198:  * show CPU area (RAM, ROM, TOS) specific statistics.
        !           199:  */
        !           200: void Profile_CpuShowStats(void)
        !           201: {
        !           202:        fprintf(stderr, "Normal RAM (0-0x%X):\n", STRamEnd);
        !           203:        show_cpu_area_stats(&cpu_profile.ram);
        !           204: 
        !           205:        fprintf(stderr, "ROM TOS (0x%X-0x%X):\n", TosAddress, TosAddress + TosSize);
        !           206:        show_cpu_area_stats(&cpu_profile.tos);
        !           207: 
        !           208:        fprintf(stderr, "Cartridge ROM (0xFA0000-0xFC0000):\n");
        !           209:        show_cpu_area_stats(&cpu_profile.rom);
        !           210: 
        !           211:        fprintf(stderr, "\n= %.5fs\n",
        !           212:                (double)cpu_profile.all.cycles / MachineClocks.CPU_Freq);
        !           213: 
        !           214: #if ENABLE_WINUAE_CPU
        !           215:        if (cpu_profile.all.misses) {   /* CPU cache in use? */
        !           216:                int i;
        !           217:                fprintf(stderr, "\nCache misses per instruction, number of occurrences:\n");
        !           218:                for (i = 0; i < MAX_MISS; i++) {
        !           219:                        fprintf(stderr, "- %d: %d\n", i, cpu_profile.miss_counts[i]);
        !           220:                }
        !           221:        }
        !           222: #endif
        !           223: }
        !           224: 
        !           225: /**
        !           226:  * Show CPU instructions which execution was profiled, in the address order,
        !           227:  * starting from the given address.  Return next disassembly address.
        !           228:  */
        !           229: Uint32 Profile_CpuShowAddresses(Uint32 lower, Uint32 upper, FILE *out)
        !           230: {
        !           231:        int oldcols[DISASM_COLUMNS], newcols[DISASM_COLUMNS];
        !           232:        int show, shown, active;
        !           233:        const char *symbol;
        !           234:        cpu_profile_item_t *data;
        !           235:        Uint32 idx, end, size;
        !           236:        uaecptr nextpc, addr;
        !           237: 
        !           238:        data = cpu_profile.data;
        !           239:        if (!data) {
        !           240:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
        !           241:                return 0;
        !           242:        }
        !           243: 
        !           244:        size = cpu_profile.size;
        !           245:        active = cpu_profile.active;
        !           246:        if (upper) {
        !           247:                end = address2index(upper);
        !           248:                show = active;
        !           249:                if (end > size) {
        !           250:                        end = size;
        !           251:                }
        !           252:        } else {
        !           253:                end = size;
        !           254:                show = ConfigureParams.Debugger.nDisasmLines;
        !           255:                if (!show || show > active) {
        !           256:                        show = active;
        !           257:                }
        !           258:        }
        !           259: 
        !           260:        /* get/change columns */
        !           261:        Disasm_GetColumns(oldcols);
        !           262:        Disasm_DisableColumn(DISASM_COLUMN_HEXDUMP, oldcols, newcols);
        !           263:        Disasm_SetColumns(newcols);
        !           264: 
        !           265:        fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <sum of i-cache misses>)\n", out);
        !           266: 
        !           267:        nextpc = 0;
        !           268:        idx = address2index(lower);
        !           269:        for (shown = 0; shown < show && idx < end; idx++) {
        !           270:                if (!data[idx].count) {
        !           271:                        continue;
        !           272:                }
        !           273:                addr = index2address(idx);
        !           274:                if (addr != nextpc && nextpc) {
        !           275:                        fprintf(out, "[...]\n");
        !           276:                }
        !           277:                symbol = Symbols_GetByCpuAddress(addr);
        !           278:                if (symbol) {
        !           279:                        fprintf(out, "%s:\n", symbol);
        !           280:                }
        !           281:                /* NOTE: column setup works only with 68kDisass disasm engine! */
        !           282:                Disasm(out, addr, &nextpc, 1);
        !           283:                shown++;
        !           284:        }
        !           285:        printf("Disassembled %d (of active %d) CPU addresses.\n", shown, active);
        !           286: 
        !           287:        /* restore disassembly columns */
        !           288:        Disasm_SetColumns(oldcols);
        !           289:        return nextpc;
        !           290: }
        !           291: 
        !           292: /**
        !           293:  * remove all disassembly columns except instruction ones.
        !           294:  * data needed to restore columns is stored to "oldcols"
        !           295:  */
        !           296: static void leave_instruction_column(int *oldcols)
        !           297: {
        !           298:        int i, newcols[DISASM_COLUMNS];
        !           299: 
        !           300:        Disasm_GetColumns(oldcols);
        !           301:        for (i = 0; i < DISASM_COLUMNS; i++) {
        !           302:                if (i == DISASM_COLUMN_OPCODE || i == DISASM_COLUMN_OPERAND) {
        !           303:                        continue;
        !           304:                }
        !           305:                Disasm_DisableColumn(i, oldcols, newcols);
        !           306:                oldcols = newcols;
        !           307:        }
        !           308:        Disasm_SetColumns(newcols);
        !           309: }
        !           310: 
        !           311: #if ENABLE_WINUAE_CPU
        !           312: /**
        !           313:  * compare function for qsort() to sort CPU profile data by instruction cache misses.
        !           314:  */
        !           315: static int cmp_cpu_misses(const void *p1, const void *p2)
        !           316: {
        !           317:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].misses;
        !           318:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].misses;
        !           319:        if (count1 > count2) {
        !           320:                return -1;
        !           321:        }
        !           322:        if (count1 < count2) {
        !           323:                return 1;
        !           324:        }
        !           325:        return 0;
        !           326: }
        !           327: 
        !           328: /**
        !           329:  * Sort CPU profile data addresses by instruction cache misses and show the results.
        !           330:  */
        !           331: void Profile_CpuShowMisses(int show)
        !           332: {
        !           333:        int active;
        !           334:        int oldcols[DISASM_COLUMNS];
        !           335:        Uint32 *sort_arr, *end, addr, nextpc;
        !           336:        cpu_profile_item_t *data = cpu_profile.data;
        !           337:        float percentage;
        !           338:        Uint32 count;
        !           339: 
        !           340:        if (!cpu_profile.all.misses) {
        !           341:                fprintf(stderr, "No CPU cache miss information available.\n");
        !           342:                return;
        !           343:        }
        !           344: 
        !           345:        active = cpu_profile.active;
        !           346:        sort_arr = cpu_profile.sort_arr;
        !           347:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_misses);
        !           348: 
        !           349:        leave_instruction_column(oldcols);
        !           350: 
        !           351:        printf("addr:\t\tmisses:\n");
        !           352:        show = (show < active ? show : active);
        !           353:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
        !           354:                addr = index2address(*sort_arr);
        !           355:                count = data[*sort_arr].misses;
        !           356:                percentage = 100.0*count/cpu_profile.all.misses;
        !           357:                printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
        !           358:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
        !           359:                Disasm(stdout, addr, &nextpc, 1);
        !           360:        }
        !           361:        printf("%d CPU addresses listed.\n", show);
        !           362: 
        !           363:        Disasm_SetColumns(oldcols);
        !           364: }
        !           365: #else
        !           366: void Profile_CpuShowMisses(int show) {
        !           367:        fprintf(stderr, "Cache misses are recorded only with WinUAE CPU.\n");
        !           368: }
        !           369: #endif
        !           370: 
        !           371: 
        !           372: /**
        !           373:  * compare function for qsort() to sort CPU profile data by cycles counts.
        !           374:  */
        !           375: static int cmp_cpu_cycles(const void *p1, const void *p2)
        !           376: {
        !           377:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].cycles;
        !           378:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].cycles;
        !           379:        if (count1 > count2) {
        !           380:                return -1;
        !           381:        }
        !           382:        if (count1 < count2) {
        !           383:                return 1;
        !           384:        }
        !           385:        return 0;
        !           386: }
        !           387: 
        !           388: /**
        !           389:  * Sort CPU profile data addresses by cycle counts and show the results.
        !           390:  */
        !           391: void Profile_CpuShowCycles(int show)
        !           392: {
        !           393:        int active;
        !           394:        int oldcols[DISASM_COLUMNS];
        !           395:        Uint32 *sort_arr, *end, addr, nextpc;
        !           396:        cpu_profile_item_t *data = cpu_profile.data;
        !           397:        float percentage;
        !           398:        Uint32 count;
        !           399: 
        !           400:        if (!data) {
        !           401:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
        !           402:                return;
        !           403:        }
        !           404: 
        !           405:        active = cpu_profile.active;
        !           406:        sort_arr = cpu_profile.sort_arr;
        !           407:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_cycles);
        !           408: 
        !           409:        leave_instruction_column(oldcols);
        !           410: 
        !           411:        printf("addr:\t\tcycles:\n");
        !           412:        show = (show < active ? show : active);
        !           413:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
        !           414:                addr = index2address(*sort_arr);
        !           415:                count = data[*sort_arr].cycles;
        !           416:                percentage = 100.0*count/cpu_profile.all.cycles;
        !           417:                printf("0x%06x\t%5.2f%%\t%d%s\t", addr, percentage, count,
        !           418:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
        !           419:                Disasm(stdout, addr, &nextpc, 1);
        !           420:        }
        !           421:        printf("%d CPU addresses listed.\n", show);
        !           422: 
        !           423:        Disasm_SetColumns(oldcols);
        !           424: }
        !           425: 
        !           426: /**
        !           427:  * compare function for qsort() to sort CPU profile data by descending
        !           428:  * address access counts.
        !           429:  */
        !           430: static int cmp_cpu_count(const void *p1, const void *p2)
        !           431: {
        !           432:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].count;
        !           433:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].count;
        !           434:        if (count1 > count2) {
        !           435:                return -1;
        !           436:        }
        !           437:        if (count1 < count2) {
        !           438:                return 1;
        !           439:        }
        !           440:        return 0;
        !           441: }
        !           442: 
        !           443: /**
        !           444:  * Sort CPU profile data addresses by call counts and show the results.
        !           445:  * If symbols are requested and symbols are loaded, show (only) addresses
        !           446:  * matching a symbol.
        !           447:  */
        !           448: void Profile_CpuShowCounts(int show, bool only_symbols)
        !           449: {
        !           450:        cpu_profile_item_t *data = cpu_profile.data;
        !           451:        int symbols, matched, active;
        !           452:        int oldcols[DISASM_COLUMNS];
        !           453:        Uint32 *sort_arr, *end, addr, nextpc;
        !           454:        const char *name;
        !           455:        float percentage;
        !           456:        Uint32 count;
        !           457: 
        !           458:        if (!data) {
        !           459:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
        !           460:                return;
        !           461:        }
        !           462:        active = cpu_profile.active;
        !           463:        show = (show < active ? show : active);
        !           464: 
        !           465:        sort_arr = cpu_profile.sort_arr;
        !           466:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_cpu_count);
        !           467: 
        !           468:        if (!only_symbols) {
        !           469:                leave_instruction_column(oldcols);
        !           470:                printf("addr:\t\tcount:\n");
        !           471:                for (end = sort_arr + show; sort_arr < end; sort_arr++) {
        !           472:                        addr = index2address(*sort_arr);
        !           473:                        count = data[*sort_arr].count;
        !           474:                        percentage = 100.0*count/cpu_profile.all.count;
        !           475:                        printf("0x%06x\t%5.2f%%\t%d%s\t",
        !           476:                               addr, percentage, count,
        !           477:                               count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
        !           478:                        Disasm(stdout, addr, &nextpc, 1);
        !           479:                }
        !           480:                printf("%d CPU addresses listed.\n", show);
        !           481:                Disasm_SetColumns(oldcols);
        !           482:                return;
        !           483:        }
        !           484: 
        !           485:        symbols = Symbols_CpuCount();
        !           486:        if (!symbols) {
        !           487:                fprintf(stderr, "ERROR: no CPU symbols loaded!\n");
        !           488:                return;
        !           489:        }
        !           490:        matched = 0;    
        !           491: 
        !           492:        leave_instruction_column(oldcols);
        !           493: 
        !           494:        printf("addr:\t\tcount:\t\tsymbol:\n");
        !           495:        for (end = sort_arr + active; sort_arr < end; sort_arr++) {
        !           496: 
        !           497:                addr = index2address(*sort_arr);
        !           498:                name = Symbols_GetByCpuAddress(addr);
        !           499:                if (!name) {
        !           500:                        continue;
        !           501:                }
        !           502:                count = data[*sort_arr].count;
        !           503:                percentage = 100.0*count/cpu_profile.all.count;
        !           504:                printf("0x%06x\t%5.2f%%\t%d\t%s%s\t",
        !           505:                       addr, percentage, count, name,
        !           506:                       count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
        !           507:                Disasm(stdout, addr, &nextpc, 1);
        !           508: 
        !           509:                matched++;
        !           510:                if (matched >= show || matched >= symbols) {
        !           511:                        break;
        !           512:                }
        !           513:        }
        !           514:        printf("%d CPU symbols listed.\n", matched);
        !           515: 
        !           516:        Disasm_SetColumns(oldcols);
        !           517: }
        !           518: 
        !           519: 
        !           520: static const char * addr2name(Uint32 addr, Uint64 *total)
        !           521: {
        !           522:        Uint32 idx = address2index(addr);
        !           523:        *total = cpu_profile.data[idx].count;
        !           524:        return Symbols_GetByCpuAddress(addr);
        !           525: }
        !           526: 
        !           527: /**
        !           528:  * Output CPU callers info to given file.
        !           529:  */
        !           530: void Profile_CpuShowCallers(FILE *fp)
        !           531: {
        !           532:        Profile_ShowCallers(fp, cpu_callinfo.sites, cpu_callinfo.site, addr2name);
        !           533: }
        !           534: 
        !           535: /**
        !           536:  * Save CPU profile information to given file.
        !           537:  */
        !           538: void Profile_CpuSave(FILE *out)
        !           539: {
        !           540:        Uint32 text;
        !           541:        fputs("Field names:\tExecuted instructions, Used cycles, Instruction cache misses\n", out);
        !           542:        /* (Python) pegexp that matches address and all describled fields from disassembly:
        !           543:         * $<hex>  :  <ASM>  <percentage>% (<count>, <cycles>, <misses>)
        !           544:         * $e5af38 :   rts           0.00% (12, 0, 12)
        !           545:         */
        !           546:        fputs("Field regexp:\t^\\$([0-9a-f]+) :.*% \\((.*)\\)$\n", out);
        !           547:        /* some information for interpreting the addresses */
        !           548:        fprintf(out, "ROM_TOS:\t0x%06x-0x%06x\n", TosAddress, TosAddress + TosSize);
        !           549:        text = DebugInfo_GetTEXT();
        !           550:        if (text < TosAddress) {
        !           551:                fprintf(out, "PROGRAM_TEXT:\t0x%06x-0x%06x\n", text, DebugInfo_GetTEXTEnd());
        !           552:        }
        !           553:        fprintf(out, "CARTRIDGE:\t0xfa0000-0xfc0000\n");
        !           554:        Profile_CpuShowAddresses(0, 0xFC0000-2, out);
        !           555:        Profile_CpuShowCallers(out);
        !           556: }
        !           557: 
        !           558: /* ------------------ CPU profile control ----------------- */
        !           559: 
        !           560: /**
        !           561:  * Initialize CPU profiling when necessary.  Return true if profiling.
        !           562:  */
        !           563: bool Profile_CpuStart(void)
        !           564: {
        !           565:        int size;
        !           566: 
        !           567:        Profile_FreeCallinfo(&(cpu_callinfo));
        !           568:        if (cpu_profile.sort_arr) {
        !           569:                /* remove previous results */
        !           570:                free(cpu_profile.sort_arr);
        !           571:                free(cpu_profile.data);
        !           572:                cpu_profile.sort_arr = NULL;
        !           573:                cpu_profile.data = NULL;
        !           574:                printf("Freed previous CPU profile buffers.\n");
        !           575:        }
        !           576:        if (!cpu_profile.enabled) {
        !           577:                return false;
        !           578:        }
        !           579:        /* zero everything */
        !           580:        memset(&cpu_profile, 0, sizeof(cpu_profile));
        !           581: 
        !           582:        /* Shouldn't change within same debug session */
        !           583:        size = (STRamEnd + 0x20000 + TosSize) / 2;
        !           584: 
        !           585:        /* Add one entry for catching invalid PC values */
        !           586:        cpu_profile.data = calloc(size + 1, sizeof(*cpu_profile.data));
        !           587:        if (!cpu_profile.data) {
        !           588:                perror("ERROR, new CPU profile buffer alloc failed");
        !           589:                return false;
        !           590:        }
        !           591:        printf("Allocated CPU profile buffer (%d MB).\n",
        !           592:               (int)sizeof(*cpu_profile.data)*size/(1024*1024));
        !           593:        cpu_profile.size = size;
        !           594: 
        !           595:        Profile_AllocCallinfo(&(cpu_callinfo), Symbols_CpuCount(), "CPU");
        !           596: 
        !           597:        /* special hack for EmuTOS */
        !           598:        etos_switcher = PC_UNDEFINED;
        !           599:        if (cpu_callinfo.sites && bIsEmuTOS &&
        !           600:            (!Symbols_GetCpuAddress(SYMTYPE_TEXT, "_switchto", &etos_switcher) || etos_switcher < TosAddress)) {
        !           601:                etos_switcher = PC_UNDEFINED;
        !           602:        }
        !           603: 
        !           604:        cpu_profile.prev_cycles = Cycles_GetCounter(CYCLES_COUNTER_CPU);
        !           605:        cpu_profile.prev_family = OpcodeFamily;
        !           606:        cpu_profile.prev_pc = M68000_GetPC() & 0xffffff;
        !           607: 
        !           608:        cpu_profile.disasm_addr = 0;
        !           609:        cpu_profile.processed = false;
        !           610:        cpu_profile.enabled = true;
        !           611:        return cpu_profile.enabled;
        !           612: }
        !           613: 
        !           614: /**
        !           615:  * return true if pc could be next instruction for previous pc
        !           616:  */
        !           617: static bool is_prev_instr(Uint32 prev_pc, Uint32 pc)
        !           618: {
        !           619:        /* just moved to next instruction (1-2 words)? */
        !           620:        if (prev_pc < pc && (pc - prev_pc) <= 10) {
        !           621:                return true;
        !           622:        }
        !           623:        return false;
        !           624: }
        !           625: 
        !           626: /**
        !           627:  * return caller instruction type classification
        !           628:  */
        !           629: static calltype_t cpu_opcode_type(int family, Uint32 prev_pc, Uint32 pc)
        !           630: {
        !           631:        switch (family) {
        !           632: 
        !           633:        case i_JSR:
        !           634:        case i_BSR:
        !           635:                return CALL_SUBROUTINE;
        !           636: 
        !           637:        case i_RTS:
        !           638:        case i_RTR:
        !           639:        case i_RTD:
        !           640:                return CALL_SUBRETURN;
        !           641: 
        !           642:        case i_JMP:     /* often used also for "inlined" function calls... */
        !           643:        case i_Bcc:     /* both BRA & BCC */
        !           644:        case i_FBcc:
        !           645:        case i_DBcc:
        !           646:        case i_FDBcc:
        !           647:                return CALL_BRANCH;
        !           648: 
        !           649:        case i_TRAP:
        !           650:        case i_TRAPV:
        !           651:        case i_TRAPcc:
        !           652:        case i_FTRAPcc:
        !           653:        case i_STOP:
        !           654:        case i_ILLG:
        !           655:        case i_CHK:
        !           656:        case i_CHK2:
        !           657:        case i_BKPT:
        !           658:                return CALL_EXCEPTION;
        !           659: 
        !           660:        case i_RTE:
        !           661:                return CALL_EXCRETURN;
        !           662:        }
        !           663:        /* just moved to next instruction? */
        !           664:        if (is_prev_instr(prev_pc, pc)) {
        !           665:                return CALL_NEXT;
        !           666:        }
        !           667:        return CALL_UNKNOWN;
        !           668: }
        !           669: 
        !           670: /**
        !           671:  * If call tracking is enabled (there are symbols), collect
        !           672:  * information about subroutine and other calls, and their costs.
        !           673:  * 
        !           674:  * Like with profile data, caller info checks need to be for previous
        !           675:  * instruction, that's why "pc" argument for this function actually
        !           676:  * needs to be previous PC.
        !           677:  */
        !           678: static void collect_calls(Uint32 pc, counters_t *counters)
        !           679: {
        !           680:        calltype_t flag;
        !           681:        int idx, family;
        !           682:        Uint32 prev_pc, caller_pc;
        !           683: 
        !           684:        family = cpu_profile.prev_family;
        !           685:        cpu_profile.prev_family = OpcodeFamily;
        !           686: 
        !           687:        prev_pc = cpu_callinfo.prev_pc;
        !           688:        cpu_callinfo.prev_pc = pc;
        !           689:        caller_pc = PC_UNDEFINED;
        !           690: 
        !           691:        /* address is return address for last subroutine call? */
        !           692:        if (unlikely(pc == cpu_callinfo.return_pc) && likely(cpu_callinfo.depth)) {
        !           693: 
        !           694:                flag = cpu_opcode_type(family, prev_pc, pc);
        !           695:                /* previous address can be exception return (RTE) if exception
        !           696:                 * occurred right after returning from subroutine call (RTS)
        !           697:                 */
        !           698:                if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) {
        !           699:                        caller_pc = Profile_CallEnd(&cpu_callinfo, counters);
        !           700:                } else {
        !           701: #if DEBUG
        !           702:                        /* although at return address, it didn't return yet,
        !           703:                         * e.g. because there was a jsr or jump to return address
        !           704:                         */
        !           705:                        Uint32 nextpc;
        !           706:                        fprintf(stderr, "WARNING: subroutine call returned 0x%x -> 0x%x, not though RTS!\n", prev_pc, pc);
        !           707:                        Disasm(stderr, prev_pc, &nextpc, 1);
        !           708: #endif
        !           709:                }
        !           710:                /* next address might be another function, so need to fall through */
        !           711:        }
        !           712: 
        !           713:        /* address is one which we're tracking? */
        !           714:        idx = Symbols_GetCpuAddressIndex(pc);
        !           715:        if (unlikely(idx >= 0)) {
        !           716: 
        !           717:                flag = cpu_opcode_type(family, prev_pc, pc);
        !           718:                if (flag == CALL_SUBROUTINE) {
        !           719:                        /* special HACK for for EmuTOS AES switcher which
        !           720:                         * changes stack content to remove itself from call
        !           721:                         * stack and uses RTS for subroutine *calls*, not
        !           722:                         * for returning from them.
        !           723:                         *
        !           724:                         * It wouldn't be reliable to detect calls from it,
        !           725:                         * so I'm making call *to* it show up as branch, to
        !           726:                         * keep callstack depth correct.
        !           727:                         */
        !           728:                        if (unlikely(pc == etos_switcher)) {
        !           729:                                flag = CALL_BRANCH;
        !           730:                        } else if (unlikely(prev_pc == PC_UNDEFINED)) {
        !           731:                                /* if first profiled instruction
        !           732:                                 * is subroutine call, it doesn't have
        !           733:                                 * valid prev_pc value stored
        !           734:                                 */
        !           735:                                cpu_callinfo.return_pc = PC_UNDEFINED;
        !           736:                                fprintf(stderr, "WARNING: previous PC from callinfo for 0x%d is undefined!\n", pc);
        !           737: #if DEBUG
        !           738:                                skip_assert = true;
        !           739:                                DebugUI(REASON_CPU_EXCEPTION);
        !           740: #endif
        !           741:                        } else {
        !           742:                                /* slow! */
        !           743:                                cpu_callinfo.return_pc = Disasm_GetNextPC(prev_pc);
        !           744:                        }
        !           745:                } else if (caller_pc != PC_UNDEFINED) {
        !           746:                        /* returned from function to first instrction of another symbol:
        !           747:                         *      0xf384  jsr some_function
        !           748:                         *      other_symbol:
        !           749:                         *      0f3x8a  some_instruction
        !           750:                         * -> change return instruction address to
        !           751:                         *    address of what did the returned call.
        !           752:                         */
        !           753:                        prev_pc = caller_pc;
        !           754:                        assert(is_prev_instr(prev_pc, pc));
        !           755:                        flag = CALL_NEXT;
        !           756:                }
        !           757:                Profile_CallStart(idx, &cpu_callinfo, prev_pc, flag, pc, counters);
        !           758:        }
        !           759: }
        !           760: 
        !           761: /**
        !           762:  * Update CPU cycle and count statistics for PC address.
        !           763:  *
        !           764:  * This gets called after instruction has executed and PC
        !           765:  * has advanced to next instruction.
        !           766:  */
        !           767: void Profile_CpuUpdate(void)
        !           768: {
        !           769:        counters_t *counters = &(cpu_profile.all);
        !           770:        Uint32 pc, prev_pc, idx, cycles, misses;
        !           771:        cpu_profile_item_t *prev;
        !           772: 
        !           773:        prev_pc = cpu_profile.prev_pc;
        !           774:        /* PC may have extra bits, they need to be masked away as
        !           775:         * emulation itself does that too when PC value is used
        !           776:         */
        !           777:        cpu_profile.prev_pc = pc = M68000_GetPC() & 0xffffff;
        !           778: 
        !           779:        idx = address2index(prev_pc);
        !           780:        assert(idx <= cpu_profile.size);
        !           781:        prev = cpu_profile.data + idx;
        !           782: 
        !           783:        if (likely(prev->count < MAX_CPU_PROFILE_VALUE)) {
        !           784:                prev->count++;
        !           785:        }
        !           786: 
        !           787: #if USE_CYCLES_COUNTER
        !           788:        /* Confusingly, with DSP enabled, cycle counter is for this instruction,
        !           789:         * without DSP enabled, it's a monotonically increasing counter.
        !           790:         */
        !           791:        if (bDspEnabled) {
        !           792:                cycles = Cycles_GetCounter(CYCLES_COUNTER_CPU);
        !           793:        } else {
        !           794:                Uint32 newcycles = Cycles_GetCounter(CYCLES_COUNTER_CPU);
        !           795:                cycles = newcycles - cpu_profile.prev_cycles;
        !           796:                cpu_profile.prev_cycles = newcycles;
        !           797:        }
        !           798: #else
        !           799:        cycles = CurrentInstrCycles + nWaitStateCycles;
        !           800: #endif
        !           801:        /* cycles are based on 8Mhz clock, change them to correct one */
        !           802:        cycles <<= nCpuFreqShift;
        !           803: 
        !           804:        if (likely(prev->cycles < MAX_CPU_PROFILE_VALUE - cycles)) {
        !           805:                prev->cycles += cycles;
        !           806:        } else {
        !           807:                prev->cycles = MAX_CPU_PROFILE_VALUE;
        !           808:        }
        !           809: 
        !           810: #if ENABLE_WINUAE_CPU
        !           811:        misses = CpuInstruction.iCacheMisses;
        !           812:        assert(misses < MAX_MISS);
        !           813:        cpu_profile.miss_counts[misses]++;
        !           814:        if (likely(prev->misses < MAX_CPU_PROFILE_VALUE - misses)) {
        !           815:                prev->misses += misses;
        !           816:        } else {
        !           817:                prev->misses = MAX_CPU_PROFILE_VALUE;
        !           818:        }
        !           819: #else
        !           820:        misses = 0;
        !           821: #endif
        !           822:        if (cpu_callinfo.sites) {
        !           823:                collect_calls(prev_pc, counters);
        !           824:        }
        !           825:        /* counters are increased after caller info is processed,
        !           826:         * otherwise cost for the instruction calling the callee
        !           827:         * doesn't get accounted to caller (but callee).
        !           828:         */
        !           829:        counters->misses += misses;
        !           830:        counters->cycles += cycles;
        !           831:        counters->count++;
        !           832: 
        !           833: #if DEBUG
        !           834:        if (unlikely(OpcodeFamily == 0)) {
        !           835:                Uint32 nextpc;
        !           836:                fputs("WARNING: instruction opcode family is zero (=i_ILLG) for instruction:\n", stderr);
        !           837:                Disasm(stderr, prev_pc, &nextpc, 1);
        !           838:        }
        !           839:        /* catch too large (and negative) cycles for other than STOP instruction */
        !           840:        if (unlikely(cycles > 512 && OpcodeFamily != i_STOP)) {
        !           841:                Uint32 nextpc;
        !           842:                fprintf(stderr, "WARNING: cycles %d > 512:\n", cycles);
        !           843:                Disasm(stderr, prev_pc, &nextpc, 1);
        !           844:        }
        !           845:        if (unlikely(cycles == 0)) {
        !           846:                Uint32 nextpc;
        !           847:                fputs("WARNING: Zero cycles for an opcode:\n", stderr);
        !           848:                Disasm(stderr, prev_pc, &nextpc, 1);
        !           849:        }
        !           850: #endif
        !           851: }
        !           852: 
        !           853: 
        !           854: /**
        !           855:  * Helper for accounting CPU profile area item.
        !           856:  */
        !           857: static void update_area_item(profile_area_t *area, Uint32 addr, cpu_profile_item_t *item)
        !           858: {
        !           859:        Uint32 cycles = item->cycles;
        !           860:        Uint32 count = item->count;
        !           861: 
        !           862:        if (!count) {
        !           863:                return;
        !           864:        }
        !           865:        area->counters.count += count;
        !           866:        area->counters.misses += item->misses;
        !           867:        area->counters.cycles += cycles;
        !           868: 
        !           869:        if (cycles == MAX_CPU_PROFILE_VALUE) {
        !           870:                area->overflow = true;
        !           871:        }
        !           872:        if (addr < area->lowest) {
        !           873:                area->lowest = addr;
        !           874:        }
        !           875:        area->highest = addr;
        !           876: 
        !           877:        area->active++;
        !           878: }
        !           879: 
        !           880: /**
        !           881:  * Helper for collecting CPU profile area statistics.
        !           882:  */
        !           883: static Uint32 update_area(profile_area_t *area, Uint32 start, Uint32 end)
        !           884: {
        !           885:        cpu_profile_item_t *item;
        !           886:        Uint32 addr;
        !           887: 
        !           888:        memset(area, 0, sizeof(profile_area_t));
        !           889:        area->lowest = cpu_profile.size;
        !           890: 
        !           891:        item = &(cpu_profile.data[start]);
        !           892:        for (addr = start; addr < end; addr++, item++) {
        !           893:                update_area_item(area, addr, item);
        !           894:        }
        !           895:        return addr;
        !           896: }
        !           897: 
        !           898: /**
        !           899:  * Helper for initializing CPU profile area sorting indexes.
        !           900:  */
        !           901: static Uint32* index_area(profile_area_t *area, Uint32 *sort_arr)
        !           902: {
        !           903:        cpu_profile_item_t *item;
        !           904:        Uint32 addr;
        !           905: 
        !           906:        item = &(cpu_profile.data[area->lowest]);
        !           907:        for (addr = area->lowest; addr <= area->highest; addr++, item++) {
        !           908:                if (item->count) {
        !           909:                        *sort_arr++ = addr;
        !           910:                }
        !           911:        }
        !           912:        return sort_arr;
        !           913: }
        !           914: 
        !           915: /**
        !           916:  * Stop and process the CPU profiling data; collect stats and
        !           917:  * prepare for more optimal sorting.
        !           918:  */
        !           919: void Profile_CpuStop(void)
        !           920: {
        !           921:        Uint32 *sort_arr, next;
        !           922:        int active;
        !           923: 
        !           924:        if (cpu_profile.processed || !cpu_profile.enabled) {
        !           925:                return;
        !           926:        }
        !           927:        /* user didn't change RAM or TOS size in the meanwhile? */
        !           928:        assert(cpu_profile.size == (STRamEnd + 0x20000 + TosSize) / 2);
        !           929: 
        !           930:        Profile_FinalizeCalls(&(cpu_callinfo), &(cpu_profile.all), Symbols_GetByCpuAddress);
        !           931: 
        !           932:        /* find lowest and highest addresses executed etc */
        !           933:        next = update_area(&cpu_profile.ram, 0, STRamEnd/2);
        !           934:        next = update_area(&cpu_profile.tos, next, (STRamEnd + TosSize)/2);
        !           935:        next = update_area(&cpu_profile.rom, next, cpu_profile.size);
        !           936:        assert(next == cpu_profile.size);
        !           937: 
        !           938: #if DEBUG
        !           939:        if (skip_assert) {
        !           940:                skip_assert = false;
        !           941:        } else
        !           942: #endif
        !           943:        {
        !           944:                assert(cpu_profile.all.misses == cpu_profile.ram.counters.misses + cpu_profile.tos.counters.misses + cpu_profile.rom.counters.misses);
        !           945:                assert(cpu_profile.all.cycles == cpu_profile.ram.counters.cycles + cpu_profile.tos.counters.cycles + cpu_profile.rom.counters.cycles);
        !           946:                assert(cpu_profile.all.count == cpu_profile.ram.counters.count + cpu_profile.tos.counters.count + cpu_profile.rom.counters.count);
        !           947:        }
        !           948: 
        !           949:        /* allocate address array for sorting */
        !           950:        active = cpu_profile.ram.active + cpu_profile.rom.active + cpu_profile.tos.active;
        !           951:        sort_arr = calloc(active, sizeof(*sort_arr));
        !           952: 
        !           953:        if (!sort_arr) {
        !           954:                perror("ERROR: allocating CPU profile address data");
        !           955:                free(cpu_profile.data);
        !           956:                cpu_profile.data = NULL;
        !           957:                return;
        !           958:        }
        !           959:        printf("Allocated CPU profile address buffer (%d KB).\n",
        !           960:               (int)sizeof(*sort_arr)*(active+512)/1024);
        !           961:        cpu_profile.sort_arr = sort_arr;
        !           962:        cpu_profile.active = active;
        !           963: 
        !           964:        /* and fill addresses for used instructions... */
        !           965:        sort_arr = index_area(&cpu_profile.ram, sort_arr);
        !           966:        sort_arr = index_area(&cpu_profile.tos, sort_arr);
        !           967:        sort_arr = index_area(&cpu_profile.rom, sort_arr);
        !           968:        assert(sort_arr == cpu_profile.sort_arr + cpu_profile.active);
        !           969:        //printf("%d/%d/%d\n", area->active, sort_arr-cpu_profile.sort_arr, active);
        !           970: 
        !           971:        Profile_CpuShowStats();
        !           972:        cpu_profile.processed = true;
        !           973: }
        !           974: 
        !           975: /**
        !           976:  * Get pointers to CPU profile enabling and disasm address variables
        !           977:  * for updating them (in parser).
        !           978:  */
        !           979: void Profile_CpuGetPointers(bool **enabled, Uint32 **disasm_addr)
        !           980: {
        !           981:        *disasm_addr = &cpu_profile.disasm_addr;
        !           982:        *enabled = &cpu_profile.enabled;
        !           983: }
        !           984: 
        !           985: /**
        !           986:  * Get callinfo & symbol search pointers for stack walking.
        !           987:  */
        !           988: void Profile_CpuGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32))
        !           989: {
        !           990:        *callinfo = &(cpu_callinfo);
        !           991:        *get_symbol = Symbols_GetByCpuAddress;
        !           992: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.