Annotation of hatari/src/debug/profile.c, revision 1.1.1.7

1.1       root        1: /*
                      2:  * Hatari - profile.c
                      3:  * 
1.1.1.5   root        4:  * Copyright (C) 2010-2015 by Eero Tamminen
1.1       root        5:  *
1.1.1.3   root        6:  * This file is distributed under the GNU General Public License, version 2
                      7:  * or at your option any later version. Read the file gpl.txt for details.
1.1       root        8:  *
1.1.1.3   root        9:  * profile.c - profile caller info handling and debugger parsing functions
1.1       root       10:  */
                     11: const char Profile_fileid[] = "Hatari profile.c : " __DATE__ " " __TIME__;
                     12: 
                     13: #include <stdio.h>
1.1.1.3   root       14: #include <assert.h>
                     15: #include <inttypes.h>
1.1       root       16: #include "main.h"
1.1.1.3   root       17: #include "version.h"
                     18: #include "debugui.h"
1.1       root       19: #include "debug_priv.h"
1.1.1.3   root       20: #include "configuration.h"
                     21: #include "clocks_timings.h"
                     22: #include "evaluate.h"
1.1.1.7 ! root       23: #include "symbols.h"
1.1       root       24: #include "profile.h"
1.1.1.3   root       25: #include "profile_priv.h"
1.1       root       26: 
1.1.1.4   root       27: profile_loop_t profile_loop;
                     28: 
1.1       root       29: 
1.1.1.3   root       30: /* ------------------ CPU/DSP caller information handling ----------------- */
1.1       root       31: 
1.1.1.3   root       32: static const struct {
                     33:        char chr;
                     34:        calltype_t bit;
                     35:        const char *info;
                     36: } flaginfo[] = {
                     37:        { 'u', CALL_UNKNOWN,    "unknown PC change" },
                     38:        { 'n', CALL_NEXT,       "PC moved to next instruction", },
                     39:        { 'b', CALL_BRANCH,     "branch/jump" },
                     40:        { 's', CALL_SUBROUTINE, "subroutine call" },
                     41:        { 'r', CALL_SUBRETURN,  "return from subroutine" },
                     42:        { 'e', CALL_EXCEPTION,  "exception" },
                     43:        { 'x', CALL_EXCRETURN,  "return from exception" }
                     44: };
                     45: 
                     46: /**
                     47:  * compare function for qsort() to sort caller data by calls
                     48:  */
                     49: static int cmp_callers(const void *c1, const void *c2)
                     50: {
                     51:        Uint32 calls1 = ((const caller_t*)c1)->calls;
                     52:        Uint32 calls2 = ((const caller_t*)c2)->calls;
                     53:        if (calls1 > calls2) {
                     54:                return -1;
1.1       root       55:        }
1.1.1.3   root       56:        if (calls1 < calls2) {
                     57:                return 1;
1.1       root       58:        }
1.1.1.3   root       59:        return 0;
1.1       root       60: }
                     61: 
                     62: /**
1.1.1.3   root       63:  * output caller counter information
1.1       root       64:  */
1.1.1.3   root       65: static bool output_counter_info(FILE *fp, counters_t *counter)
1.1       root       66: {
1.1.1.3   root       67:        if (!counter->count) {
1.1       root       68:                return false;
                     69:        }
1.1.1.3   root       70:        /* number of calls needs to be first and rest must be in the same order as
                     71:         * they're in the profile disassembly (count of instructions, etc...).
                     72:         */
                     73:        fprintf(fp, " %"PRIu64"/%"PRIu64"/%"PRIu64"",
                     74:                counter->calls, counter->count, counter->cycles);
1.1.1.5   root       75:        if (counter->i_misses) {
1.1.1.3   root       76:                /* these are only with specific WinUAE CPU core */
1.1.1.5   root       77:                fprintf(fp, "/%"PRIu64"/%"PRIu64"",
                     78:                        counter->i_misses, counter->d_hits);
1.1.1.3   root       79:        }
                     80:        return true;
1.1       root       81: }
                     82: 
                     83: /**
1.1.1.3   root       84:  * output caller call counts, call type(s) and costs
1.1       root       85:  */
1.1.1.3   root       86: static void output_caller_info(FILE *fp, caller_t *info, Uint32 *typeaddr)
1.1       root       87: {
1.1.1.3   root       88:        int k, typecount;
                     89: 
                     90:        fprintf(fp, "0x%x = %d", info->addr, info->calls);
                     91:        if (info->flags) {      /* calltypes supported? */
                     92:                fputc(' ', fp);
                     93:                typecount = 0;
1.1.1.6   root       94:                for (k = 0; k < ARRAY_SIZE(flaginfo); k++) {
1.1.1.3   root       95:                        if (info->flags & flaginfo[k].bit) {
                     96:                                fputc(flaginfo[k].chr, fp);
                     97:                                typecount++;
                     98:                        }
                     99:                }
                    100:                if (typecount > 1) {
                    101:                        *typeaddr = info->addr;
                    102:                }
1.1       root      103:        }
1.1.1.3   root      104:        if (output_counter_info(fp, &(info->all))) {
                    105:                output_counter_info(fp, &(info->own));
                    106:                if (info->calls != info->own.calls) {
                    107:                        fprintf(stderr, "WARNING: mismatch between function 0x%x call count %d and own call cost %"PRIu64"!\n",
                    108:                               info->addr, info->calls, info->own.calls);
                    109:                }
1.1       root      110:        }
1.1.1.3   root      111:        fputs(", ", fp);
1.1       root      112: }
                    113: 
1.1.1.3   root      114: /*
                    115:  * Show collected CPU/DSP callee/caller information.
                    116:  *
                    117:  * Hint: As caller info list is based on number of loaded symbols,
                    118:  * load only text symbols to save memory & make things faster...
1.1       root      119:  */
1.1.1.3   root      120: void Profile_ShowCallers(FILE *fp, int sites, callee_t *callsite, const char * (*addr2name)(Uint32, Uint64 *))
1.1       root      121: {
1.1.1.3   root      122:        int i, j, countissues, countdiff;
                    123:        const char *name;
                    124:        caller_t *info;
                    125:        Uint64 total;
                    126:        Uint32 addr, typeaddr;
                    127: 
                    128:        /* legend */
                    129:        fputs("# <callee>: <caller1> = <calls> <types>[ <inclusive/totals>[ <exclusive/totals>]], <caller2> ..., <callee name>", fp);
                    130:        fputs("\n# types: ", fp);
1.1.1.6   root      131:        for (i = 0; i < ARRAY_SIZE(flaginfo); i++) {
1.1.1.3   root      132:                fprintf(fp, "%c = %s, ", flaginfo[i].chr, flaginfo[i].info);
                    133:        }
1.1.1.5   root      134:        fputs("\n# totals: calls/instructions/cycles/i-misses/d-hits\n", fp);
1.1.1.3   root      135: 
                    136:        countdiff = 0;
                    137:        countissues = 0;
                    138:        for (i = 0; i < sites; i++, callsite++) {
                    139:                addr = callsite->addr;
                    140:                if (!addr) {
                    141:                        continue;
                    142:                }
                    143:                name = addr2name(addr, &total);
                    144:                fprintf(fp, "0x%x: ", callsite->addr);
                    145: 
                    146:                typeaddr = 0;
                    147:                info = callsite->callers;
                    148:                qsort(info, callsite->count, sizeof(*info), cmp_callers);
                    149:                for (j = 0; j < callsite->count; j++, info++) {
                    150:                        if (!info->calls) {
                    151:                                break;
                    152:                        }
                    153:                        total -= info->calls;
                    154:                        output_caller_info(fp, info, &typeaddr);
                    155:                }
                    156:                if (name) {
                    157:                        fprintf(fp, "%s", name);
                    158:                }
                    159:                fputs("\n", fp);
                    160:                if (total) {
                    161: #if DEBUG
                    162:                        fprintf(stderr, "WARNING: %llu differences in call and instruction counts for '%s'!\n", total, name);
                    163: #endif
                    164:                        countdiff += total;
                    165:                        countissues++;
                    166:                }
                    167:                if (typeaddr) {
                    168:                        fprintf(stderr, "WARNING: different types of calls (at least) from 0x%x (to 0x%x),\n\t has its codechanged during profiling?\n",
                    169:                                typeaddr, callsite->addr);
                    170:                }
                    171:        }
                    172:        if (countissues) {
                    173:                if (countdiff <= 2 && countissues == countdiff) {
                    174:                        fprintf(stderr, "WARNING: callcount mismatches (%d calls) with address instruction\n\t counts in %d cases, most likely profile start & end.\n",
                    175:                                countdiff, countissues);
                    176:                } else {
                    177:                        /* profiler bug: some (address?) mismatch in recording instruction counts and call counts */
                    178:                        fprintf(stderr, "ERROR: callcount mismatches with address instruction counts\n\t(%d in total) detected in %d cases!\n",
                    179:                                countdiff, countissues);
                    180:                }
1.1       root      181:        }
                    182: }
                    183: 
                    184: 
                    185: /**
1.1.1.3   root      186:  * add second counter values to first counters
1.1       root      187:  */
1.1.1.3   root      188: static void add_counter_costs(counters_t *dst, counters_t *src)
1.1       root      189: {
1.1.1.3   root      190:        dst->calls += src->calls;
                    191:        dst->count += src->count;
                    192:        dst->cycles += src->cycles;
1.1.1.5   root      193:        dst->i_misses += src->i_misses;
                    194:        dst->d_hits += src->d_hits;
1.1       root      195: }
                    196: 
                    197: /**
1.1.1.3   root      198:  * set first counter values to their difference from a reference value
1.1       root      199:  */
1.1.1.3   root      200: static void set_counter_diff(counters_t *dst, counters_t *ref)
1.1       root      201: {
1.1.1.3   root      202:        dst->calls = ref->calls - dst->calls;
                    203:        dst->count = ref->count - dst->count;
                    204:        dst->cycles = ref->cycles - dst->cycles;
1.1.1.5   root      205:        dst->i_misses = ref->i_misses - dst->i_misses;
                    206:        dst->d_hits = ref->d_hits - dst->d_hits;
1.1       root      207: }
                    208: 
                    209: /**
1.1.1.3   root      210:  * add called (callee) function costs to caller information
1.1       root      211:  */
1.1.1.3   root      212: static void add_callee_cost(callee_t *callsite, callstack_t *stack)
1.1       root      213: {
1.1.1.3   root      214:        caller_t *info = callsite->callers;
                    215:        counters_t owncost;
                    216:        int i;
1.1       root      217: 
1.1.1.3   root      218:        for (i = 0; i < callsite->count; i++, info++) {
                    219:                if (info->addr == stack->caller_addr) {
                    220:                        /* own cost for callee is its child (out) costs
                    221:                         * deducted from full (all) costs
                    222:                         */
                    223:                        owncost = stack->out;
                    224:                        set_counter_diff(&owncost, &(stack->all));
                    225:                        add_counter_costs(&(info->own), &owncost);
                    226:                        add_counter_costs(&(info->all), &(stack->all));
                    227:                        return;
                    228:                }
1.1       root      229:        }
1.1.1.3   root      230:        /* cost is only added for updated callers,
                    231:         * so they should always exist
                    232:         */
                    233:        fprintf(stderr, "ERROR: trying to add costs to non-existing 0x%x caller of 0x%x!\n",
                    234:                stack->caller_addr, callsite->addr);
                    235:        assert(0);
1.1       root      236: }
                    237: 
                    238: 
1.1.1.3   root      239: static void add_caller(callee_t *callsite, Uint32 pc, Uint32 prev_pc, calltype_t flag)
1.1       root      240: {
1.1.1.3   root      241:        caller_t *info;
                    242:        int i, count;
1.1       root      243: 
1.1.1.3   root      244:        /* need to store real call addresses as symbols can change
                    245:         * after profiling has been stopped
                    246:         */
                    247:        info = callsite->callers;
                    248:        if (!info) {
                    249:                info = calloc(1, sizeof(*info));
                    250:                if (!info) {
                    251:                        fprintf(stderr, "ERROR: caller info alloc failed!\n");
                    252:                        return;
1.1       root      253:                }
1.1.1.3   root      254:                /* first call to this address, save address */
                    255:                callsite->addr = pc;
                    256:                callsite->callers = info;
                    257:                callsite->count = 1;
1.1       root      258:        }
1.1.1.3   root      259:        /* how many caller slots are currently allocated? */
                    260:        count = callsite->count;
                    261:        for (;;) {
                    262:                for (i = 0; i < count; i++, info++) {
                    263:                        if (info->addr == prev_pc) {
                    264:                                info->flags |= flag;
                    265:                                info->calls++;
                    266:                                return;
                    267:                        }
                    268:                        if (!info->addr) {
                    269:                                /* empty slot */
                    270:                                info->addr = prev_pc;
                    271:                                info->flags |= flag;
                    272:                                info->calls = 1;
                    273:                                return;
                    274:                        }
1.1       root      275:                }
1.1.1.3   root      276:                /* not enough, double caller slots */
                    277:                count *= 2;
                    278:                info = realloc(callsite->callers, count * sizeof(*info));
                    279:                if (!info) {
                    280:                        fprintf(stderr, "ERROR: caller info alloc failed!\n");
                    281:                        return;
1.1       root      282:                }
1.1.1.3   root      283:                memset(info + callsite->count, 0, callsite->count * sizeof(*info));
                    284:                callsite->callers = info;
                    285:                callsite->count = count;
1.1       root      286:        }
                    287: }
                    288: 
                    289: /**
1.1.1.3   root      290:  * Add information about called symbol, and if it was subroutine
                    291:  * call, add it to stack of functions which total costs are tracked.
                    292:  * callinfo.return_pc needs to be set before invoking this if the call
                    293:  * is of type CALL_SUBROUTINE.
1.1       root      294:  */
1.1.1.3   root      295: void Profile_CallStart(int idx, callinfo_t *callinfo, Uint32 prev_pc, calltype_t flag, Uint32 pc, counters_t *totalcost)
1.1       root      296: {
1.1.1.3   root      297:        callstack_t *stack;
                    298:        int count;
1.1       root      299: 
1.1.1.3   root      300:        if (unlikely(idx >= callinfo->sites)) {
                    301:                fprintf(stderr, "ERROR: number of symbols increased during profiling (%d > %d)!\n", idx, callinfo->sites);
1.1       root      302:                return;
                    303:        }
                    304: 
1.1.1.3   root      305:        add_caller(callinfo->site + idx, pc, prev_pc, flag);
1.1       root      306: 
1.1.1.3   root      307:        /* subroutine call which will return? */
                    308:        if (flag != CALL_SUBROUTINE) {
                    309:                /* no, some other call type */
1.1       root      310:                return;
                    311:        }
1.1.1.3   root      312:        /* yes, add it to call stack */
1.1       root      313: 
1.1.1.3   root      314:        if (unlikely(!callinfo->count)) {
                    315:                /* initial stack alloc, can be a bit larger */
                    316:                count = 8;
                    317:                stack = calloc(count, sizeof(*stack));
                    318:                if (!stack) {
                    319:                        fputs("ERROR: callstack alloc failed!\n", stderr);
                    320:                        return;
1.1       root      321:                }
1.1.1.3   root      322:                callinfo->stack = stack;
                    323:                callinfo->count = count;
1.1       root      324: 
1.1.1.3   root      325:        } else if (unlikely(callinfo->depth+1 >= callinfo->count)) {
                    326:                /* need to alloc more stack space for new call? */
                    327:                count = callinfo->count * 2;
                    328:                stack = realloc(callinfo->stack, count * sizeof(*stack));
                    329:                if (!stack) {
                    330:                        fputs("ERROR: callstack alloc failed!\n", stderr);
                    331:                        return;
1.1       root      332:                }
1.1.1.3   root      333:                memset(stack + callinfo->count, 0, callinfo->count * sizeof(*stack));
                    334:                callinfo->stack = stack;
                    335:                callinfo->count = count;
1.1       root      336:        }
                    337: 
1.1.1.3   root      338:        /* only first instruction can be undefined */
                    339:        assert(callinfo->return_pc != PC_UNDEFINED || !callinfo->depth);
1.1       root      340: 
1.1.1.3   root      341:        /* called function */
                    342:        stack = &(callinfo->stack[callinfo->depth++]);
1.1       root      343: 
1.1.1.3   root      344:        /* store current running totals & zero subcall costs */
                    345:        stack->all = *totalcost;
                    346:        memset(&(stack->out), 0, sizeof(stack->out));
1.1       root      347: 
1.1.1.3   root      348:        /* set subroutine call information */
                    349:        stack->ret_addr = callinfo->return_pc;
                    350:        stack->callee_idx = idx;
                    351:        stack->caller_addr = prev_pc;
                    352:        stack->callee_addr = pc;
1.1       root      353: 
1.1.1.3   root      354:        /* record call to this into costs... */
                    355:        totalcost->calls++;
1.1       root      356: }
                    357: 
                    358: /**
1.1.1.3   root      359:  * If it really was subcall (function) return, store returned function
                    360:  * costs and update callinfo->return_pc value.  Return address of
                    361:  * the instruction which did the returned call.
1.1       root      362:  */
1.1.1.3   root      363: Uint32 Profile_CallEnd(callinfo_t *callinfo, counters_t *totalcost)
1.1       root      364: {
1.1.1.3   root      365:        callstack_t *stack;
1.1       root      366: 
1.1.1.3   root      367:        assert(callinfo->depth);
1.1       root      368: 
1.1.1.3   root      369:        /* remove call info from stack */
                    370:        callinfo->depth--;
1.1       root      371: 
1.1.1.3   root      372:        /* callinfo->depth points now to to-be removed item */
                    373:        stack = &(callinfo->stack[callinfo->depth]);
1.1       root      374: 
1.1.1.3   root      375:        if (unlikely(stack->caller_addr == PC_UNDEFINED)) {
                    376:                /* return address can be undefined only for
                    377:                 * first profiled instruction, i.e. only for
                    378:                 * function at top of stack
                    379:                 */
                    380:                assert(!callinfo->depth);
                    381:        } else {
                    382:                /* full cost is original global cost (in ->all)
                    383:                 * deducted from current global (total) cost
                    384:                 */
                    385:                set_counter_diff(&(stack->all), totalcost);
                    386:                add_callee_cost(callinfo->site + stack->callee_idx, stack);
1.1       root      387:        }
                    388: 
1.1.1.3   root      389:        /* if current function had a parent:
                    390:         * - start tracking that
                    391:         * - add full cost of current function to parent's outside costs
                    392:         */
                    393:        if (callinfo->depth) {
                    394:                callstack_t *parent = stack - 1;
                    395:                callinfo->return_pc = parent->ret_addr;
                    396:                add_counter_costs(&(parent->out), &(stack->all));
                    397:        } else {
                    398:                callinfo->return_pc = PC_UNDEFINED;
1.1       root      399:        }
                    400: 
1.1.1.3   root      401:        /* where the returned function was called from */
                    402:        return stack->caller_addr;
                    403: }
1.1       root      404: 
                    405: /**
1.1.1.3   root      406:  * Add costs to all functions still in call stack
1.1       root      407:  */
1.1.1.7 ! root      408: void Profile_FinalizeCalls(callinfo_t *callinfo, counters_t *totalcost, const char* (*get_symbol)(Uint32, symtype_t))
1.1       root      409: {
1.1.1.3   root      410:        Uint32 addr;
                    411:        if (!callinfo->depth) {
1.1       root      412:                return;
                    413:        }
1.1.1.3   root      414:        fprintf(stderr, "Finalizing costs for %d non-returned functions:\n", callinfo->depth);
                    415:        while (callinfo->depth > 0) {
                    416:                Profile_CallEnd(callinfo, totalcost);
                    417:                addr = callinfo->stack[callinfo->depth].callee_addr;
1.1.1.7 ! root      418:                fprintf(stderr, "- 0x%x: %s (return = 0x%x)\n",
        !           419:                        addr, get_symbol(addr, SYMTYPE_TEXT),
1.1.1.3   root      420:                        callinfo->stack[callinfo->depth].ret_addr);
1.1       root      421:        }
                    422: }
                    423: 
                    424: /**
1.1.1.3   root      425:  * Show current profile stack
1.1       root      426:  */
1.1.1.3   root      427: static void Profile_ShowStack(bool forDsp)
1.1       root      428: {
1.1.1.3   root      429:        int i;
                    430:        Uint32 addr;
                    431:        callinfo_t *callinfo;
1.1.1.7 ! root      432:        const char* (*get_symbol)(Uint32, symtype_t);
1.1.1.3   root      433: 
                    434:        if (forDsp) {
                    435:                Profile_DspGetCallinfo(&callinfo, &get_symbol);
                    436:        } else {
                    437:                Profile_CpuGetCallinfo(&callinfo, &get_symbol);
1.1       root      438:        }
1.1.1.3   root      439:        if (!callinfo->depth) {
                    440:                fprintf(stderr, "Empty stack.\n");
                    441:                return;
1.1       root      442:        }
                    443: 
1.1.1.3   root      444:        for (i = 0; i < callinfo->depth; i++) {
                    445:                addr = callinfo->stack[i].callee_addr;
1.1.1.7 ! root      446:                fprintf(stderr, "- 0x%x: %s (return = 0x%x)\n",
        !           447:                        addr, get_symbol(addr, SYMTYPE_TEXT),
        !           448:                        callinfo->stack[i].ret_addr);
1.1       root      449:        }
                    450: }
                    451: 
                    452: /**
1.1.1.3   root      453:  * Allocate & set initial callinfo structure information
1.1       root      454:  */
1.1.1.3   root      455: int Profile_AllocCallinfo(callinfo_t *callinfo, int count, const char *name)
1.1       root      456: {
1.1.1.3   root      457:        callinfo->sites = count;
                    458:        if (count) {
                    459:                /* alloc & clear new data */
                    460:                callinfo->site = calloc(count, sizeof(callee_t));
                    461:                if (callinfo->site) {
                    462:                        printf("Allocated %s profile callsite buffer for %d symbols.\n", name, count);
                    463:                        callinfo->prev_pc = callinfo->return_pc = PC_UNDEFINED;
                    464:                } else {
                    465:                        fprintf(stderr, "ERROR: callesite buffer alloc failed!\n");
                    466:                        callinfo->sites = 0;
                    467:                }
1.1       root      468:        }
1.1.1.3   root      469:        return callinfo->sites;
1.1       root      470: }
                    471: 
                    472: /**
1.1.1.3   root      473:  * Free all callinfo structure information
1.1       root      474:  */
1.1.1.3   root      475: void Profile_FreeCallinfo(callinfo_t *callinfo)
1.1       root      476: {
1.1.1.3   root      477:        int i;
                    478:        if (callinfo->sites) {
                    479:                callee_t *site = callinfo->site;
                    480:                for (i = 0; i < callinfo->sites; i++, site++) {
                    481:                        if (site->callers) {
                    482:                                free(site->callers);
                    483:                        }
1.1       root      484:                }
1.1.1.3   root      485:                free(callinfo->site);
                    486:                if (callinfo->stack) {
                    487:                        free(callinfo->stack);
                    488:                }
                    489:                memset(callinfo, 0, sizeof(*callinfo));
1.1       root      490:        }
                    491: }
                    492: 
                    493: 
                    494: /* ------------------- command parsing ---------------------- */
                    495: 
                    496: /**
                    497:  * Readline match callback to list profile subcommand names.
                    498:  * STATE = 0 -> different text from previous one.
                    499:  * Return next match or NULL if no matches.
                    500:  */
                    501: char *Profile_Match(const char *text, int state)
                    502: {
                    503:        static const char *names[] = {
1.1.1.5   root      504:                "addresses", "callers", "caches", "counts", "cycles", "d-hits", "i-misses",
                    505:                "loops", "off", "on", "save", "stack", "stats", "symbols"
1.1       root      506:        };
1.1.1.6   root      507:        return DebugUI_MatchHelper(names, ARRAY_SIZE(names), text, state);
1.1       root      508: }
                    509: 
                    510: const char Profile_Description[] =
1.1.1.4   root      511:        "<subcommand> [parameter]\n"
                    512:        "\n"
                    513:        "\tSubcommands:\n"
                    514:        "\t- on\n"
                    515:        "\t- off\n"
                    516:        "\t- counts [count]\n"
                    517:        "\t- cycles [count]\n"
1.1.1.5   root      518:        "\t- i-misses [count]\n"
                    519:        "\t- d-hits [count]\n"
1.1.1.4   root      520:        "\t- symbols [count]\n"
                    521:        "\t- addresses [address]\n"
                    522:        "\t- callers\n"
1.1.1.5   root      523:        "\t- caches\n"
1.1.1.4   root      524:        "\t- stack\n"
                    525:        "\t- stats\n"
                    526:        "\t- save <file>\n"
                    527:        "\t- loops <file> [CPU limit] [DSP limit]\n"
                    528:        "\n"
1.1.1.3   root      529:        "\t'on' & 'off' enable and disable profiling.  Data is collected\n"
                    530:        "\tuntil debugger is entered again at which point you get profiling\n"
                    531:        "\tstatistics ('stats') summary.\n"
                    532:        "\n"
                    533:        "\tThen you can ask for list of the PC addresses, sorted either by\n"
1.1.1.5   root      534:        "\texecution 'counts', used 'cycles', i-cache misses or d-cache hits.\n"
                    535:        "\tFirst can be limited just to named addresses with 'symbols'.\n"
                    536:        "\tOptional count will limit how many items will be shown.\n"
                    537:        "\n"
1.1.1.6   root      538:        "\t'caches' shows histogram of CPU cache usage.\n"
1.1.1.3   root      539:        "\n"
                    540:        "\t'addresses' lists the profiled addresses in order, with the\n"
                    541:        "\tinstructions (currently) residing at them.  By default this\n"
                    542:        "\tstarts from the first executed instruction, or you can\n"
                    543:        "\tspecify the starting address.\n"
                    544:        "\n"
                    545:        "\t'callers' shows (raw) caller information for addresses which\n"
1.1.1.4   root      546:        "\thad symbol(s) associated with them.  'stack' shows the current\n"
                    547:        "\tprofile stack (this is useful only with :noinit breakpoints).\n"
1.1.1.3   root      548:        "\n"
1.1.1.4   root      549:        "\tProfile address and callers information can be saved with\n"
                    550:        "\t'save' command.\n"
                    551:        "\n"
                    552:        "\tDetailed (spin) looping information can be collected by\n"
                    553:        "\tspecifying to which file it should be saved, with optional\n"
                    554:        "\tlimit(s) on how many bytes first and last instruction\n"
                    555:        "\taddress of the loop can differ (0 = no limit).";
1.1       root      556: 
                    557: 
                    558: /**
1.1.1.3   root      559:  * Save profiling information for CPU or DSP.
                    560:  */
                    561: static bool Profile_Save(const char *fname, bool bForDsp)
                    562: {
                    563:        FILE *out;
                    564:        Uint32 freq;
                    565:        const char *proc, *core;
                    566:        if (!(out = fopen(fname, "w"))) {
                    567:                fprintf(stderr, "ERROR: opening '%s' for writing failed!\n", fname);
                    568:                perror(NULL);
                    569:                return false;
                    570:        }
                    571:        if (bForDsp) {
                    572:                freq = MachineClocks.DSP_Freq;
                    573:                proc = "DSP";
                    574:        } else {
1.1.1.7 ! root      575:                freq = MachineClocks.CPU_Freq_Emul;
1.1.1.3   root      576:                proc = "CPU";
                    577:        }
                    578: #if ENABLE_WINUAE_CPU
                    579:        core = "WinUAE";
                    580: #else
                    581:        core = "OldUAE";
                    582: #endif
                    583:        fprintf(out, "Hatari %s profile (%s, %s CPU core)\n", proc, PROG_NAME, core);
                    584:        fprintf(out, "Cycles/second:\t%u\n", freq);
                    585:        if (bForDsp) {
                    586:                Profile_DspSave(out);
                    587:        } else {
                    588:                Profile_CpuSave(out);
                    589:        }
                    590:        fclose(out);
                    591:        return true;
                    592: }
                    593: 
                    594: /**
1.1.1.4   root      595:  * function CPU & DSP profiling functionality can call to
                    596:  * reset loop information log by truncating it.  Only portable
                    597:  * way to do that is re-opening it again.
                    598:  */
                    599: bool Profile_LoopReset(void)
                    600: {
                    601:        if (!profile_loop.filename) {
                    602:                return false;
                    603:        }
                    604:        if (profile_loop.fp) {
                    605:                fclose(profile_loop.fp);
                    606:        }
                    607:        profile_loop.fp = fopen(profile_loop.filename, "w");
                    608:        if (!profile_loop.fp) {
                    609:                return false;
                    610:        }
                    611:        fprintf(profile_loop.fp, "# <processor> <VBLs from boot> <address> <size> <loops>\n");
                    612:        return true;
                    613: }
                    614: 
                    615: /**
                    616:  * Open file common to both CPU and DSP profiling.
                    617:  */
                    618: static bool Profile_Loops(int nArgc, char *psArgs[])
                    619: {
                    620:        if (nArgc > 2) {
                    621:                /* check that the given file can be opened for writing */
                    622:                if (profile_loop.filename) {
                    623:                        free(profile_loop.filename);
                    624:                }
                    625:                profile_loop.filename = strdup(psArgs[2]);
                    626:                if (Profile_LoopReset()) {
                    627:                        if (nArgc > 3) {
                    628:                                profile_loop.cpu_limit = atoi(psArgs[3]);
                    629:                                if (nArgc > 4) {
                    630:                                        profile_loop.dsp_limit = atoi(psArgs[4]);
                    631:                                }
                    632:                        }
                    633:                        fprintf(stderr, "Additional max %d (CPU) & %d (DSP) byte loop profiling enabled to:\n\t%s\n",
                    634:                                profile_loop.cpu_limit, profile_loop.cpu_limit, psArgs[2]);
                    635:                } else {
                    636:                        free(profile_loop.filename);
                    637:                        profile_loop.filename = NULL;
                    638:                        perror("ERROR: opening profile loop output file failed, disabling!");
                    639:                        return false;
                    640:                }
                    641:        } else {
                    642:                if (profile_loop.fp) {
                    643:                        fprintf(stderr, "Disabling loop profiling.\n");
                    644:                        free(profile_loop.filename);
                    645:                        profile_loop.filename = NULL;
                    646:                        fclose(profile_loop.fp);
                    647:                        profile_loop.fp = NULL;
1.1.1.7 ! root      648:                } else {
        !           649:                        fprintf(stderr, "ERROR: no file name for saving the loop profiling information.\n");
1.1.1.4   root      650:                }
                    651:        }
                    652:        return true;
                    653: }
                    654: 
                    655: /**
1.1       root      656:  * Command: CPU/DSP profiling enabling, exec stats, cycle and call stats.
1.1.1.3   root      657:  * Returns DEBUGGER_CMDDONE or DEBUGGER_CMDCONT.
1.1       root      658:  */
1.1.1.3   root      659: int Profile_Command(int nArgc, char *psArgs[], bool bForDsp)
1.1       root      660: {
                    661:        static int show = 16;
1.1.1.3   root      662:        Uint32 *disasm_addr;
1.1       root      663:        bool *enabled;
1.1.1.3   root      664: 
1.1       root      665:        if (nArgc > 2) {
                    666:                show = atoi(psArgs[2]);
1.1.1.4   root      667:        }
1.1       root      668:        if (bForDsp) {
1.1.1.3   root      669:                Profile_DspGetPointers(&enabled, &disasm_addr);
1.1       root      670:        } else {
1.1.1.3   root      671:                Profile_CpuGetPointers(&enabled, &disasm_addr);
1.1       root      672:        }
1.1.1.3   root      673: 
                    674:        /* continue or explicit addresses command? */
                    675:        if (nArgc < 2 || strcmp(psArgs[1], "addresses") == 0) {
                    676:                Uint32 lower, upper = 0;
                    677:                if (nArgc > 2) {
                    678:                        if (Eval_Range(psArgs[2], &lower, &upper, false) < 0) {
                    679:                                return DEBUGGER_CMDDONE;
                    680:                        }
                    681:                } else {
                    682:                        lower = *disasm_addr;
                    683:                }
                    684:                if (bForDsp) {
1.1.1.7 ! root      685:                        *disasm_addr = Profile_DspShowAddresses(lower, upper, stdout, PAGING_ENABLED);
1.1.1.3   root      686:                } else {
1.1.1.7 ! root      687:                        *disasm_addr = Profile_CpuShowAddresses(lower, upper, stdout, PAGING_ENABLED);
1.1.1.3   root      688:                }
                    689:                return DEBUGGER_CMDCONT;
                    690: 
                    691:        } else if (strcmp(psArgs[1], "on") == 0) {
1.1       root      692:                *enabled = true;
                    693:                fprintf(stderr, "Profiling enabled.\n");
1.1.1.3   root      694: 
                    695:        } else if (strcmp(psArgs[1], "off") == 0) {
1.1       root      696:                *enabled = false;
                    697:                fprintf(stderr, "Profiling disabled.\n");
                    698:        
1.1.1.3   root      699:        } else if (strcmp(psArgs[1], "stats") == 0) {
1.1       root      700:                if (bForDsp) {
                    701:                        Profile_DspShowStats();
                    702:                } else {
                    703:                        Profile_CpuShowStats();
                    704:                }
1.1.1.5   root      705:        } else if (strcmp(psArgs[1], "i-misses") == 0) {
                    706:                if (bForDsp) {
                    707:                        fprintf(stderr, "Cache information is recorded only for CPU, not DSP.\n");
                    708:                } else {
                    709:                        Profile_CpuShowInstrMisses(show);
                    710:                }
                    711:        } else if (strcmp(psArgs[1], "d-hits") == 0) {
                    712:                if (bForDsp) {
                    713:                        fprintf(stderr, "Cache information is recorded only for CPU, not DSP.\n");
                    714:                } else {
                    715:                        Profile_CpuShowDataHits(show);
                    716:                }
                    717:        } else if (strcmp(psArgs[1], "caches") == 0) {
1.1.1.3   root      718:                if (bForDsp) {
1.1.1.5   root      719:                        fprintf(stderr, "Cache information is recorded only for CPU, not DSP.\n");
1.1.1.3   root      720:                } else {
1.1.1.5   root      721:                        Profile_CpuShowCaches();
1.1.1.3   root      722:                }
1.1       root      723:        } else if (strcmp(psArgs[1], "cycles") == 0) {
                    724:                if (bForDsp) {
                    725:                        Profile_DspShowCycles(show);
                    726:                } else {
                    727:                        Profile_CpuShowCycles(show);
                    728:                }
                    729:        } else if (strcmp(psArgs[1], "counts") == 0) {
                    730:                if (bForDsp) {
                    731:                        Profile_DspShowCounts(show, false);
                    732:                } else {
                    733:                        Profile_CpuShowCounts(show, false);
                    734:                }
1.1.1.3   root      735:        } else if (strcmp(psArgs[1], "symbols") == 0) {
1.1       root      736:                if (bForDsp) {
                    737:                        Profile_DspShowCounts(show, true);
                    738:                } else {
                    739:                        Profile_CpuShowCounts(show, true);
                    740:                }
1.1.1.3   root      741:        } else if (strcmp(psArgs[1], "callers") == 0) {
                    742:                if (bForDsp) {
                    743:                        Profile_DspShowCallers(stdout);
                    744:                } else {
                    745:                        Profile_CpuShowCallers(stdout);
                    746:                }
                    747:        } else if (strcmp(psArgs[1], "stack") == 0) {
                    748:                Profile_ShowStack(bForDsp);
1.1.1.4   root      749: 
1.1.1.3   root      750:        } else if (strcmp(psArgs[1], "save") == 0) {
                    751:                Profile_Save(psArgs[2], bForDsp);
1.1.1.4   root      752: 
                    753:        } else if (strcmp(psArgs[1], "loops") == 0) {
                    754:                Profile_Loops(nArgc, psArgs);
                    755: 
1.1       root      756:        } else {
                    757:                DebugUI_PrintCmdHelp(psArgs[0]);
                    758:        }
1.1.1.3   root      759:        return DEBUGGER_CMDDONE;
1.1       root      760: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.