Annotation of hatari/src/debug/profiledsp.c, revision 1.1

1.1     ! root        1: /*
        !             2:  * Hatari - profiledsp.c
        !             3:  * 
        !             4:  * Copyright (C) 2010-2013 by Eero Tamminen
        !             5:  *
        !             6:  * This file is distributed under the GNU General Public License, version 2
        !             7:  * or at your option any later version. Read the file gpl.txt for details.
        !             8:  *
        !             9:  * profiledsp.c - functions for profiling DSP and showing the results.
        !            10:  */
        !            11: const char Profiledsp_fileid[] = "Hatari profiledsp.c : " __DATE__ " " __TIME__;
        !            12: 
        !            13: #include <stdio.h>
        !            14: #include <inttypes.h>
        !            15: #include <assert.h>
        !            16: #include "main.h"
        !            17: #include "configuration.h"
        !            18: #include "clocks_timings.h"
        !            19: #include "dsp.h"
        !            20: #include "profile.h"
        !            21: #include "profile_priv.h"
        !            22: #include "symbols.h"
        !            23: 
        !            24: static callinfo_t dsp_callinfo;
        !            25: 
        !            26: #define DSP_PROFILE_ARR_SIZE 0x10000
        !            27: #define MAX_DSP_PROFILE_VALUE 0xFFFFFFFFFFFFFFFFLL
        !            28: 
        !            29: typedef struct {
        !            30:        Uint64 count;           /* how many times this address is used */
        !            31:        Uint64 cycles;          /* how many DSP cycles was taken at this address */
        !            32:        Uint16 min_cycle;
        !            33:        Uint16 max_cycle;
        !            34: } dsp_profile_item_t;
        !            35: 
        !            36: static struct {
        !            37:        dsp_profile_item_t *data; /* profile data */
        !            38:        profile_area_t ram;   /* statistics for whole memory */
        !            39:        Uint16 *sort_arr;     /* data indexes used for sorting */
        !            40:        Uint16 prev_pc;       /* previous PC for which the cycles are for */
        !            41:        Uint32 disasm_addr;   /* 'dspaddresses' command start address */
        !            42:        bool processed;       /* true when data is already processed */
        !            43:        bool enabled;         /* true when profiling enabled */
        !            44: } dsp_profile;
        !            45: 
        !            46: 
        !            47: /* ------------------ DSP profile results ----------------- */
        !            48: 
        !            49: /**
        !            50:  * Get DSP cycles, count and count percentage for given address.
        !            51:  * Return true if data was available and non-zero, false otherwise.
        !            52:  */
        !            53: bool Profile_DspAddressData(Uint16 addr, float *percentage, Uint64 *count, Uint64 *cycles, Uint16 *cycle_diff)
        !            54: {
        !            55:        dsp_profile_item_t *item;
        !            56:        if (!dsp_profile.data) {
        !            57:                return false;
        !            58:        }
        !            59:        item = dsp_profile.data + addr;
        !            60: 
        !            61:        *cycles = item->cycles;
        !            62:        *count = item->count;
        !            63:        if (item->max_cycle) {
        !            64:                *cycle_diff = item->max_cycle - item->min_cycle;
        !            65:        } else {
        !            66:                *cycle_diff = 0;
        !            67:        }
        !            68:        if (dsp_profile.ram.counters.count) {
        !            69:                *percentage = 100.0*(*count)/dsp_profile.ram.counters.count;
        !            70:        } else {
        !            71:                *percentage = 0.0;
        !            72:        }
        !            73:        return (*count > 0);
        !            74: }
        !            75: 
        !            76: /**
        !            77:  * show DSP specific profile statistics.
        !            78:  */
        !            79: void Profile_DspShowStats(void)
        !            80: {
        !            81:        profile_area_t *area = &dsp_profile.ram;
        !            82:        fprintf(stderr, "DSP profile statistics (0x0-0xFFFF):\n");
        !            83:        if (!area->active) {
        !            84:                fprintf(stderr, "- no activity\n");
        !            85:                return;
        !            86:        }
        !            87:        fprintf(stderr, "- active address range:\n  0x%04x-0x%04x\n",
        !            88:                area->lowest, area->highest);
        !            89:        fprintf(stderr, "- active instruction addresses:\n  %d\n",
        !            90:                area->active);
        !            91:        fprintf(stderr, "- executed instructions:\n  %"PRIu64"\n",
        !            92:                area->counters.count);
        !            93:        /* indicates either instruction(s) that address different memory areas
        !            94:         * (they can have different access costs), or more significantly,
        !            95:         * DSP code that has changed during profiling.
        !            96:         */
        !            97:        fprintf(stderr, "- sum of per instruction cycle changes\n"
        !            98:                "  (can indicate code change during profiling):\n  %"PRIu64"\n",
        !            99:                area->counters.misses);
        !           100: 
        !           101:        fprintf(stderr, "- used cycles:\n  %"PRIu64"\n",
        !           102:                area->counters.cycles);
        !           103:        if (area->overflow) {
        !           104:                fprintf(stderr, "  *** COUNTERS OVERFLOW! ***\n");
        !           105:        }
        !           106:        fprintf(stderr, "\n= %.5fs\n", (double)(area->counters.cycles) / MachineClocks.DSP_Freq);
        !           107: }
        !           108: 
        !           109: /**
        !           110:  * Show DSP instructions which execution was profiled, in the address order,
        !           111:  * starting from the given address.  Return next disassembly address.
        !           112:  */
        !           113: Uint16 Profile_DspShowAddresses(Uint32 addr, Uint32 upper, FILE *out)
        !           114: {
        !           115:        int show, shown, active;
        !           116:        dsp_profile_item_t *data;
        !           117:        Uint16 nextpc;
        !           118:        Uint32 end;
        !           119:        const char *symbol;
        !           120: 
        !           121:        data = dsp_profile.data;
        !           122:        if (!data) {
        !           123:                fprintf(stderr, "ERROR: no DSP profiling data available!\n");
        !           124:                return 0;
        !           125:        }
        !           126: 
        !           127:        end = DSP_PROFILE_ARR_SIZE;
        !           128:        active = dsp_profile.ram.active;
        !           129:        show = ConfigureParams.Debugger.nDisasmLines;
        !           130:        if (upper) {
        !           131:                if (upper < end) {
        !           132:                        end = upper;
        !           133:                }
        !           134:                show = active;
        !           135:        } else {
        !           136:                show = ConfigureParams.Debugger.nDisasmLines;
        !           137:                if (!show || show > active) {
        !           138:                        show = active;
        !           139:                }
        !           140:        }
        !           141: 
        !           142:        fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <max cycle difference>)\n", out);
        !           143: 
        !           144:        nextpc = 0;
        !           145:        for (shown = 0; shown < show && addr < end; addr++) {
        !           146:                if (!data[addr].count) {
        !           147:                        continue;
        !           148:                }
        !           149:                if (addr != nextpc && nextpc) {
        !           150:                        fputs("[...]\n", out);
        !           151:                }
        !           152:                symbol = Symbols_GetByDspAddress(addr);
        !           153:                if (symbol) {
        !           154:                        fprintf(out, "%s:\n", symbol);
        !           155:                }
        !           156:                nextpc = DSP_DisasmAddress(out, addr, addr);
        !           157:                shown++;
        !           158:        }
        !           159:        printf("Disassembled %d (of active %d) DSP addresses.\n", shown, active);
        !           160:        return nextpc;
        !           161: }
        !           162: 
        !           163: /**
        !           164:  * compare function for qsort() to sort DSP profile data by descdending
        !           165:  * address cycles counts.
        !           166:  */
        !           167: static int cmp_dsp_cycles(const void *p1, const void *p2)
        !           168: {
        !           169:        Uint64 count1 = dsp_profile.data[*(const Uint16*)p1].cycles;
        !           170:        Uint64 count2 = dsp_profile.data[*(const Uint16*)p2].cycles;
        !           171:        if (count1 > count2) {
        !           172:                return -1;
        !           173:        }
        !           174:        if (count1 < count2) {
        !           175:                return 1;
        !           176:        }
        !           177:        return 0;
        !           178: }
        !           179: 
        !           180: /**
        !           181:  * Sort DSP profile data addresses by cycle counts and show the results.
        !           182:  */
        !           183: void Profile_DspShowCycles(int show)
        !           184: {
        !           185:        int active;
        !           186:        Uint16 *sort_arr, *end, addr;
        !           187:        dsp_profile_item_t *data = dsp_profile.data;
        !           188:        float percentage;
        !           189:        Uint64 count;
        !           190: 
        !           191:        if (!data) {
        !           192:                fprintf(stderr, "ERROR: no DSP profiling data available!\n");
        !           193:                return;
        !           194:        }
        !           195: 
        !           196:        active = dsp_profile.ram.active;
        !           197:        sort_arr = dsp_profile.sort_arr;
        !           198:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_dsp_cycles);
        !           199: 
        !           200:        printf("addr:\tcycles:\n");
        !           201:        show = (show < active ? show : active);
        !           202:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
        !           203:                addr = *sort_arr;
        !           204:                count = data[addr].cycles;
        !           205:                percentage = 100.0*count/dsp_profile.ram.counters.cycles;
        !           206:                printf("0x%04x\t%5.2f%%\t%"PRIu64"%s\n", addr, percentage, count,
        !           207:                       count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
        !           208:        }
        !           209:        printf("%d DSP addresses listed.\n", show);
        !           210: }
        !           211: 
        !           212: 
        !           213: /**
        !           214:  * compare function for qsort() to sort DSP profile data by descdending
        !           215:  * address access counts.
        !           216:  */
        !           217: static int cmp_dsp_count(const void *p1, const void *p2)
        !           218: {
        !           219:        Uint64 count1 = dsp_profile.data[*(const Uint16*)p1].count;
        !           220:        Uint64 count2 = dsp_profile.data[*(const Uint16*)p2].count;
        !           221:        if (count1 > count2) {
        !           222:                return -1;
        !           223:        }
        !           224:        if (count1 < count2) {
        !           225:                return 1;
        !           226:        }
        !           227:        return 0;
        !           228: }
        !           229: 
        !           230: /**
        !           231:  * Sort DSP profile data addresses by call counts and show the results.
        !           232:  * If symbols are requested and symbols are loaded, show (only) addresses
        !           233:  * matching a symbol.
        !           234:  */
        !           235: void Profile_DspShowCounts(int show, bool only_symbols)
        !           236: {
        !           237:        dsp_profile_item_t *data = dsp_profile.data;
        !           238:        int symbols, matched, active;
        !           239:        Uint16 *sort_arr, *end, addr;
        !           240:        const char *name;
        !           241:        float percentage;
        !           242:        Uint64 count;
        !           243: 
        !           244:        if (!data) {
        !           245:                fprintf(stderr, "ERROR: no DSP profiling data available!\n");
        !           246:                return;
        !           247:        }
        !           248:        active = dsp_profile.ram.active;
        !           249:        show = (show < active ? show : active);
        !           250: 
        !           251:        sort_arr = dsp_profile.sort_arr;
        !           252:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_dsp_count);
        !           253: 
        !           254:        if (!only_symbols) {
        !           255:                printf("addr:\tcount:\n");
        !           256:                for (end = sort_arr + show; sort_arr < end; sort_arr++) {
        !           257:                        addr = *sort_arr;
        !           258:                        count = data[addr].count;
        !           259:                        percentage = 100.0*count/dsp_profile.ram.counters.count;
        !           260:                        printf("0x%04x\t%5.2f%%\t%"PRIu64"%s\n",
        !           261:                               addr, percentage, count,
        !           262:                               count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
        !           263:                }
        !           264:                printf("%d DSP addresses listed.\n", show);
        !           265:                return;
        !           266:        }
        !           267: 
        !           268:        symbols = Symbols_DspCount();
        !           269:        if (!symbols) {
        !           270:                fprintf(stderr, "ERROR: no DSP symbols loaded!\n");
        !           271:                return;
        !           272:        }
        !           273:        matched = 0;    
        !           274: 
        !           275:        printf("addr:\tcount:\t\tsymbol:\n");
        !           276:        for (end = sort_arr + active; sort_arr < end; sort_arr++) {
        !           277: 
        !           278:                addr = *sort_arr;
        !           279:                name = Symbols_GetByDspAddress(addr);
        !           280:                if (!name) {
        !           281:                        continue;
        !           282:                }
        !           283:                count = data[addr].count;
        !           284:                percentage = 100.0*count/dsp_profile.ram.counters.count;
        !           285:                printf("0x%04x\t%.2f%%\t%"PRIu64"\t%s%s\n",
        !           286:                       addr, percentage, count, name,
        !           287:                       count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
        !           288: 
        !           289:                matched++;
        !           290:                if (matched >= show || matched >= symbols) {
        !           291:                        break;
        !           292:                }
        !           293:        }
        !           294:        printf("%d DSP symbols listed.\n", matched);
        !           295: }
        !           296: 
        !           297: 
        !           298: static const char * addr2name(Uint32 addr, Uint64 *total)
        !           299: {
        !           300:        *total = dsp_profile.data[addr].count;
        !           301:        return Symbols_GetByDspAddress(addr);
        !           302: }
        !           303: 
        !           304: /**
        !           305:  * Output DSP callers info to given file.
        !           306:  */
        !           307: void Profile_DspShowCallers(FILE *fp)
        !           308: {
        !           309:        Profile_ShowCallers(fp, dsp_callinfo.sites, dsp_callinfo.site, addr2name);
        !           310: }
        !           311: 
        !           312: /**
        !           313:  * Save DSP profile information to given file.
        !           314:  */
        !           315: void Profile_DspSave(FILE *out)
        !           316: {
        !           317:        /* Comma separated descriptions for the profile disassembly data fields.
        !           318:         * Instructions and cycles need to be first two fields!
        !           319:         */
        !           320:        fputs("Field names:\tExecuted instructions, Used cycles, Largest cycle differences (= code changes during profiling)\n", out);
        !           321:        /* (Python) pegexp that matches address and all describled fields from disassembly:
        !           322:         * <space>:<address> <opcodes> (<instr cycles>) <instr> <count>% (<count>, <cycles>)
        !           323:         * p:0202  0aa980 000200  (07 cyc)  jclr #0,x:$ffe9,p:$0200  0.00% (6, 42)
        !           324:         */
        !           325:        fputs("Field regexp:\t^p:([0-9a-f]+) .*% \\((.*)\\)$\n", out);
        !           326:        Profile_DspShowAddresses(0, DSP_PROFILE_ARR_SIZE, out);
        !           327:        Profile_DspShowCallers(out);
        !           328: }
        !           329: 
        !           330: /* ------------------ DSP profile control ----------------- */
        !           331: 
        !           332: /**
        !           333:  * Initialize DSP profiling when necessary.  Return true if profiling.
        !           334:  */
        !           335: bool Profile_DspStart(void)
        !           336: {
        !           337:        dsp_profile_item_t *item;
        !           338:        int i;
        !           339: 
        !           340:        Profile_FreeCallinfo(&(dsp_callinfo));
        !           341:        if (dsp_profile.sort_arr) {
        !           342:                /* remove previous results */
        !           343:                free(dsp_profile.sort_arr);
        !           344:                free(dsp_profile.data);
        !           345:                dsp_profile.sort_arr = NULL;
        !           346:                dsp_profile.data = NULL;
        !           347:                printf("Freed previous DSP profile buffers.\n");
        !           348:        }
        !           349:        if (!dsp_profile.enabled) {
        !           350:                return false;
        !           351:        }
        !           352:        /* zero everything */
        !           353:        memset(&dsp_profile, 0, sizeof(dsp_profile));
        !           354: 
        !           355:        dsp_profile.data = calloc(DSP_PROFILE_ARR_SIZE, sizeof(*dsp_profile.data));
        !           356:        if (!dsp_profile.data) {
        !           357:                perror("ERROR, new DSP profile buffer alloc failed");
        !           358:                return false;
        !           359:        }
        !           360:        printf("Allocated DSP profile buffer (%d KB).\n",
        !           361:               (int)sizeof(*dsp_profile.data)*DSP_PROFILE_ARR_SIZE/1024);
        !           362: 
        !           363:        Profile_AllocCallinfo(&(dsp_callinfo), Symbols_DspCount(), "DSP");
        !           364: 
        !           365:        item = dsp_profile.data;
        !           366:        for (i = 0; i < DSP_PROFILE_ARR_SIZE; i++, item++) {
        !           367:                item->min_cycle = 0xFFFF;
        !           368:        }
        !           369:        dsp_profile.prev_pc = DSP_GetPC();
        !           370: 
        !           371:        dsp_profile.disasm_addr = 0;
        !           372:        dsp_profile.processed = false;
        !           373:        dsp_profile.enabled = true;
        !           374:        return dsp_profile.enabled;
        !           375: }
        !           376: 
        !           377: /* return true if pc is next instruction for previous pc */
        !           378: static bool is_prev_instr(Uint16 prev_pc, Uint16 pc)
        !           379: {
        !           380:        /* just moved to next instruction (1-2 words)? */
        !           381:        if (prev_pc < pc && (pc - prev_pc) <= 4) {
        !           382:                return true;
        !           383:        }
        !           384:        return false;
        !           385: }
        !           386: 
        !           387: /* return branch type based on caller instruction type */
        !           388: static calltype_t dsp_opcode_type(Uint16 prev_pc, Uint16 pc)
        !           389: {
        !           390:        const char *dummy;
        !           391:        Uint32 opcode;
        !           392: 
        !           393:        /* 24-bit instruction opcode */
        !           394:        opcode = DSP_ReadMemory(prev_pc, 'P', &dummy) & 0xFFFFFF;
        !           395: 
        !           396:        /* subroutine returns */
        !           397:        if (opcode == 0xC) {    /* (just) RTS */
        !           398:                return CALL_SUBRETURN;
        !           399:        }
        !           400:        /* unconditional subroutine calls */
        !           401:        if ((opcode & 0xFFF000) == 0xD0000 ||   /* JSR   00001101 0000aaaa aaaaaaaa */
        !           402:            (opcode & 0xFFC0FF) == 0xBC080) {   /* JSR   00001011 11MMMRRR 10000000 */
        !           403:                return CALL_SUBROUTINE;
        !           404:        }
        !           405:        /* conditional subroutine calls */
        !           406:        if ((opcode & 0xFF0000) == 0xF0000 ||   /* JSCC  00001111 CCCCaaaa aaaaaaaa */
        !           407:            (opcode & 0xFFC0F0) == 0xBC0A0 ||   /* JSCC  00001011 11MMMRRR 1010CCCC */
        !           408:            (opcode & 0xFFC0A0) == 0xB4080 ||   /* JSCLR 00001011 01MMMRRR 1S0bbbbb */
        !           409:            (opcode & 0xFFC0A0) == 0xB0080 ||   /* JSCLR 00001011 00aaaaaa 1S0bbbbb */
        !           410:            (opcode & 0xFFC0A0) == 0xB8080 ||   /* JSCLR 00001011 10pppppp 1S0bbbbb */
        !           411:            (opcode & 0xFFC0E0) == 0xBC000 ||   /* JSCLR 00001011 11DDDDDD 000bbbbb */
        !           412:            (opcode & 0xFFC0A0) == 0xB40A0 ||   /* JSSET 00001011 01MMMRRR 1S1bbbbb */
        !           413:            (opcode & 0xFFC0A0) == 0xB00A0 ||   /* JSSET 00001011 00aaaaaa 1S1bbbbb */
        !           414:            (opcode & 0xFFC0A0) == 0xB80A0 ||   /* JSSET 00001011 10pppppp 1S1bbbbb */
        !           415:            (opcode & 0xFFC0E0) == 0xBC020) {   /* JSSET 00001011 11DDDDDD 001bbbbb */
        !           416:                /* hopefully fairly safe heuristic:
        !           417:                 * if previously executed instruction
        !           418:                 * was one before current one, no
        !           419:                 * subroutine call was made to next
        !           420:                 * instruction, the condition just
        !           421:                 * wasn't met.
        !           422:                 */
        !           423:                if (is_prev_instr(prev_pc, pc)) {
        !           424:                        return CALL_NEXT;
        !           425:                }
        !           426:                return CALL_SUBROUTINE;
        !           427:        }
        !           428:        /* exception handler returns */
        !           429:        if (opcode == 0x4) {    /* (just) RTI */
        !           430:                return CALL_EXCRETURN;
        !           431:        }
        !           432: 
        !           433:        /* Besides CALL_UNKNOWN, rest isn't used by subroutine call
        !           434:         * cost collection.  However, it's useful info when debugging
        !           435:         * code or reading full callgraphs (because optimized code uses
        !           436:         * also jumps/branches for subroutine calls).
        !           437:         */
        !           438: 
        !           439:        /* TODO: exception invocation.
        !           440:         * Could be detected by PC going through low interrupt vector adresses,
        !           441:         * but fast-calls using JSR/RTS would need separate handling.
        !           442:         */
        !           443:        if (0) {        /* TODO */
        !           444:                return CALL_EXCEPTION;
        !           445:        }
        !           446:        /* branches */
        !           447:        if ((opcode & 0xFFF000) == 0xC0000 ||   /* JMP  00001100 0000aaaa aaaaaaaa */
        !           448:            (opcode & 0xFFC0FF) == 0xAC080 ||   /* JMP  00001010 11MMMRRR 10000000 */
        !           449:            (opcode & 0xFF0000) == 0xE0000 ||   /* JCC  00001110 CCCCaaaa aaaaaaaa */
        !           450:            (opcode & 0xFFC0F0) == 0xAC0A0 ||   /* JCC  00001010 11MMMRRR 1010CCCC */
        !           451:            (opcode & 0xFFC0A0) == 0xA8080 ||   /* JCLR 00001010 10pppppp 1S0bbbbb */
        !           452:            (opcode & 0xFFC0A0) == 0xA4080 ||   /* JCLR 00001010 01MMMRRR 1S0bbbbb */
        !           453:            (opcode & 0xFFC0A0) == 0xA0080 ||   /* JCLR 00001010 00aaaaaa 1S0bbbbb */
        !           454:            (opcode & 0xFFC0E0) == 0xAC000 ||   /* JCLR 00001010 11dddddd 000bbbbb */
        !           455:            (opcode & 0xFFC0A0) == 0xA80A0 ||   /* JSET 00001010 10pppppp 1S1bbbbb */
        !           456:            (opcode & 0xFFC0A0) == 0xA40A0 ||   /* JSET 00001010 01MMMRRR 1S1bbbbb */
        !           457:            (opcode & 0xFFC0A0) == 0xA00A0 ||   /* JSET 00001010 00aaaaaa 1S1bbbbb */
        !           458:            (opcode & 0xFFC0E0) == 0xAC020 ||   /* JSET 00001010 11dddddd 001bbbbb */
        !           459:            (opcode & 0xFF00F0) == 0x600A0 ||   /* REP  00000110 iiiiiiii 1010hhhh */
        !           460:            (opcode & 0xFFC0FF) == 0x6C020 ||   /* REP  00000110 11dddddd 00100000 */
        !           461:            (opcode & 0xFFC0BF) == 0x64020 ||   /* REP  00000110 01MMMRRR 0s100000 */
        !           462:            (opcode & 0xFFC0BF) == 0x60020 ||   /* REP  00000110 00aaaaaa 0s100000 */
        !           463:            (opcode & 0xFF00F0) == 0x60080 ||   /* DO/ENDO 00000110 iiiiiiii 1000hhhh */
        !           464:            (opcode & 0xFFC0FF) == 0x6C000 ||   /* DO/ENDO 00000110 11DDDDDD 00000000 */
        !           465:            (opcode & 0xFFC0BF) == 0x64000 ||   /* DO/ENDO 00000110 01MMMRRR 0S000000 */
        !           466:            (opcode & 0xFFC0BF) == 0x60000) {   /* DO/ENDO 00000110 00aaaaaa 0S000000 */
        !           467:                return CALL_BRANCH;
        !           468:        }
        !           469:        if (is_prev_instr(prev_pc, pc)) {
        !           470:                return CALL_NEXT;
        !           471:        }
        !           472:        return CALL_UNKNOWN;
        !           473: }
        !           474: 
        !           475: /**
        !           476:  * If call tracking is enabled (there are symbols), collect
        !           477:  * information about subroutine and other calls, and their costs.
        !           478:  * 
        !           479:  * Like with profile data, caller info checks need to be for previous
        !           480:  * instruction, that's why "pc" argument for this function actually
        !           481:  * needs to be previous PC.
        !           482:  */
        !           483: static void collect_calls(Uint16 pc, counters_t *counters)
        !           484: {
        !           485:        calltype_t flag;
        !           486:        Uint16 prev_pc;
        !           487:        Uint32 caller_pc;
        !           488:        int idx;
        !           489: 
        !           490:        prev_pc = dsp_callinfo.prev_pc;
        !           491:        dsp_callinfo.prev_pc = pc;
        !           492:        caller_pc = PC_UNDEFINED;
        !           493: 
        !           494:        /* address is return address for last subroutine call? */
        !           495:        if (unlikely(pc == dsp_callinfo.return_pc) && likely(dsp_callinfo.depth)) {
        !           496: 
        !           497:                flag = dsp_opcode_type(prev_pc, pc);
        !           498:                /* return address is entered either by subroutine return,
        !           499:                 * or by returning from exception that interrupted
        !           500:                 * the instruction at return address.
        !           501:                 */
        !           502:                if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) {
        !           503:                        caller_pc = Profile_CallEnd(&dsp_callinfo, counters);
        !           504:                }
        !           505:        }
        !           506: 
        !           507:        /* address is one which we're tracking? */
        !           508:        idx = Symbols_GetDspAddressIndex(pc);
        !           509:        if (unlikely(idx >= 0)) {
        !           510: 
        !           511:                flag = dsp_opcode_type(prev_pc, pc);
        !           512:                if (flag == CALL_SUBROUTINE) {
        !           513:                        dsp_callinfo.return_pc = DSP_GetNextPC(prev_pc);  /* slow! */
        !           514:                } else if (caller_pc != PC_UNDEFINED) {
        !           515:                        /* returned from function, change return
        !           516:                         * instruction address to address of
        !           517:                         * what did the returned call.
        !           518:                         */
        !           519:                        prev_pc = caller_pc;
        !           520:                        assert(is_prev_instr(prev_pc, pc));
        !           521:                        flag = CALL_NEXT;
        !           522:                }
        !           523:                Profile_CallStart(idx, &dsp_callinfo, prev_pc, flag, pc, counters);
        !           524: 
        !           525:        }
        !           526: }
        !           527: 
        !           528: /**
        !           529:  * Update DSP cycle and count statistics for PC address.
        !           530:  *
        !           531:  * This is called after instruction is executed and PC points
        !           532:  * to next instruction i.e. info is for previous PC address.
        !           533:  */
        !           534: void Profile_DspUpdate(void)
        !           535: {
        !           536:        dsp_profile_item_t *prev;
        !           537:        Uint16 pc, prev_pc, cycles;
        !           538:        counters_t *counters;
        !           539: 
        !           540:        prev_pc = dsp_profile.prev_pc;
        !           541:        dsp_profile.prev_pc = pc = DSP_GetPC();
        !           542:        prev = dsp_profile.data + prev_pc;
        !           543: 
        !           544:        if (likely(prev->count < MAX_DSP_PROFILE_VALUE)) {
        !           545:                prev->count++;
        !           546:        }
        !           547: 
        !           548:        cycles = DSP_GetInstrCycles();
        !           549:        if (likely(prev->cycles < MAX_DSP_PROFILE_VALUE - cycles)) {
        !           550:                prev->cycles += cycles;
        !           551:        } else {
        !           552:                prev->cycles = MAX_DSP_PROFILE_VALUE;
        !           553:        }
        !           554: 
        !           555:        if (unlikely(cycles < prev->min_cycle)) {
        !           556:                prev->min_cycle = cycles;
        !           557:        }
        !           558:        if (unlikely(cycles > prev->max_cycle)) {
        !           559:                prev->max_cycle = cycles;
        !           560:        }
        !           561: 
        !           562:        counters = &(dsp_profile.ram.counters);
        !           563:        if (dsp_callinfo.sites) {
        !           564:                collect_calls(prev_pc, counters);
        !           565:        }
        !           566:        /* counters are increased after caller info is processed,
        !           567:         * otherwise cost for the instruction calling the callee
        !           568:         * doesn't get accounted to caller (but callee).
        !           569:         */
        !           570:        counters->cycles += cycles;
        !           571:        counters->count++;
        !           572: }
        !           573: 
        !           574: /**
        !           575:  * Helper for collecting DSP profile area statistics.
        !           576:  */
        !           577: static void update_area_item(profile_area_t *area, Uint16 addr, dsp_profile_item_t *item)
        !           578: {
        !           579:        Uint64 cycles = item->cycles;
        !           580:        Uint64 count = item->count;
        !           581:        Uint16 diff;
        !           582: 
        !           583:        if (!count) {
        !           584:                return;
        !           585:        }
        !           586:        if (cycles == MAX_DSP_PROFILE_VALUE) {
        !           587:                area->overflow = true;
        !           588:        }
        !           589:        if (item->max_cycle) {
        !           590:                diff = item->max_cycle - item->min_cycle;
        !           591:        } else {
        !           592:                diff = 0;
        !           593:        }
        !           594: 
        !           595:        area->counters.count += count;
        !           596:        area->counters.cycles += cycles;
        !           597:        area->counters.misses += diff;
        !           598: 
        !           599:        if (addr < area->lowest) {
        !           600:                area->lowest = addr;
        !           601:        }
        !           602:        area->highest = addr;
        !           603: 
        !           604:        area->active++;
        !           605: }
        !           606: 
        !           607: /**
        !           608:  * Stop and process the DSP profiling data; collect stats and
        !           609:  * prepare for more optimal sorting.
        !           610:  */
        !           611: void Profile_DspStop(void)
        !           612: {
        !           613:        dsp_profile_item_t *item;
        !           614:        profile_area_t *area;
        !           615:        Uint16 *sort_arr;
        !           616:        Uint32 addr;
        !           617: 
        !           618:        if (dsp_profile.processed || !dsp_profile.enabled) {
        !           619:                return;
        !           620:        }
        !           621: 
        !           622:        Profile_FinalizeCalls(&(dsp_callinfo), &(dsp_profile.ram.counters), Symbols_GetByDspAddress);
        !           623: 
        !           624:        /* find lowest and highest  addresses executed */
        !           625:        area = &dsp_profile.ram;
        !           626:        memset(area, 0, sizeof(profile_area_t));
        !           627:        area->lowest = DSP_PROFILE_ARR_SIZE;
        !           628: 
        !           629:        item = dsp_profile.data;
        !           630:        for (addr = 0; addr < DSP_PROFILE_ARR_SIZE; addr++, item++) {
        !           631:                update_area_item(area, addr, item);
        !           632:        }
        !           633: 
        !           634:        /* allocate address array for sorting */
        !           635:        sort_arr = calloc(dsp_profile.ram.active, sizeof(*sort_arr));
        !           636: 
        !           637:        if (!sort_arr) {
        !           638:                perror("ERROR: allocating DSP profile address data");
        !           639:                free(dsp_profile.data);
        !           640:                dsp_profile.data = NULL;
        !           641:                return;
        !           642:        }
        !           643:        printf("Allocated DSP profile address buffer (%d KB).\n",
        !           644:               (int)sizeof(*sort_arr)*(dsp_profile.ram.active+512)/1024);
        !           645:        dsp_profile.sort_arr = sort_arr;
        !           646: 
        !           647:        /* ...and fill addresses for used instructions... */
        !           648:        area = &dsp_profile.ram;
        !           649:        item = &(dsp_profile.data[area->lowest]);
        !           650:        for (addr = area->lowest; addr <= area->highest; addr++, item++) {
        !           651:                if (item->count) {
        !           652:                        *sort_arr++ = addr;
        !           653:                }
        !           654:        }
        !           655:        //printf("%d/%d/%d\n", area->active, sort_arr-dsp_profile.sort_arr, active);
        !           656: 
        !           657:        Profile_DspShowStats();
        !           658:        dsp_profile.processed = true;
        !           659: }
        !           660: 
        !           661: /**
        !           662:  * Get pointers to DSP profile enabling and disasm address variables
        !           663:  * for updating them (in parser).
        !           664:  */
        !           665: void Profile_DspGetPointers(bool **enabled, Uint32 **disasm_addr)
        !           666: {
        !           667:        *disasm_addr = &dsp_profile.disasm_addr;
        !           668:        *enabled = &dsp_profile.enabled;
        !           669: }
        !           670: 
        !           671: /**
        !           672:  * Get callinfo & symbol search pointers for stack walking.
        !           673:  */
        !           674: void Profile_DspGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32))
        !           675: {
        !           676:        *callinfo = &(dsp_callinfo);
        !           677:        *get_symbol = Symbols_GetByDspAddress;
        !           678: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.