Annotation of hatari/src/debug/profiledsp.c, revision 1.1.1.5

1.1       root        1: /*
                      2:  * Hatari - profiledsp.c
                      3:  * 
1.1.1.3   root        4:  * Copyright (C) 2010-2015 by Eero Tamminen
1.1       root        5:  *
                      6:  * This file is distributed under the GNU General Public License, version 2
                      7:  * or at your option any later version. Read the file gpl.txt for details.
                      8:  *
                      9:  * profiledsp.c - functions for profiling DSP and showing the results.
                     10:  */
                     11: const char Profiledsp_fileid[] = "Hatari profiledsp.c : " __DATE__ " " __TIME__;
                     12: 
                     13: #include <stdio.h>
                     14: #include <inttypes.h>
1.1.1.4   root       15: #include <limits.h>
1.1       root       16: #include <assert.h>
                     17: #include "main.h"
                     18: #include "configuration.h"
                     19: #include "clocks_timings.h"
                     20: #include "dsp.h"
1.1.1.4   root       21: #include "symbols.h"
1.1       root       22: #include "profile.h"
                     23: #include "profile_priv.h"
1.1.1.4   root       24: #include "debug_priv.h"
1.1.1.2   root       25: /* for VBL info */
                     26: #include "screen.h"
                     27: #include "video.h"
1.1       root       28: 
                     29: static callinfo_t dsp_callinfo;
                     30: 
                     31: #define DSP_PROFILE_ARR_SIZE 0x10000
                     32: #define MAX_DSP_PROFILE_VALUE 0xFFFFFFFFFFFFFFFFLL
                     33: 
                     34: typedef struct {
                     35:        Uint64 count;           /* how many times this address is used */
                     36:        Uint64 cycles;          /* how many DSP cycles was taken at this address */
                     37:        Uint16 min_cycle;
                     38:        Uint16 max_cycle;
                     39: } dsp_profile_item_t;
                     40: 
                     41: static struct {
                     42:        dsp_profile_item_t *data; /* profile data */
                     43:        profile_area_t ram;   /* statistics for whole memory */
                     44:        Uint16 *sort_arr;     /* data indexes used for sorting */
                     45:        Uint16 prev_pc;       /* previous PC for which the cycles are for */
1.1.1.2   root       46:        Uint16 loop_start;    /* address of last loop start */
                     47:        Uint16 loop_end;      /* address of last loop end */
                     48:        Uint32 loop_count;    /* how many times it was looped */
1.1       root       49:        Uint32 disasm_addr;   /* 'dspaddresses' command start address */
                     50:        bool processed;       /* true when data is already processed */
                     51:        bool enabled;         /* true when profiling enabled */
                     52: } dsp_profile;
                     53: 
                     54: 
                     55: /* ------------------ DSP profile results ----------------- */
                     56: 
                     57: /**
                     58:  * Get DSP cycles, count and count percentage for given address.
                     59:  * Return true if data was available and non-zero, false otherwise.
                     60:  */
                     61: bool Profile_DspAddressData(Uint16 addr, float *percentage, Uint64 *count, Uint64 *cycles, Uint16 *cycle_diff)
                     62: {
                     63:        dsp_profile_item_t *item;
                     64:        if (!dsp_profile.data) {
                     65:                return false;
                     66:        }
                     67:        item = dsp_profile.data + addr;
                     68: 
                     69:        *cycles = item->cycles;
                     70:        *count = item->count;
                     71:        if (item->max_cycle) {
                     72:                *cycle_diff = item->max_cycle - item->min_cycle;
                     73:        } else {
                     74:                *cycle_diff = 0;
                     75:        }
                     76:        if (dsp_profile.ram.counters.count) {
                     77:                *percentage = 100.0*(*count)/dsp_profile.ram.counters.count;
                     78:        } else {
                     79:                *percentage = 0.0;
                     80:        }
                     81:        return (*count > 0);
                     82: }
                     83: 
                     84: /**
                     85:  * show DSP specific profile statistics.
                     86:  */
                     87: void Profile_DspShowStats(void)
                     88: {
                     89:        profile_area_t *area = &dsp_profile.ram;
                     90:        fprintf(stderr, "DSP profile statistics (0x0-0xFFFF):\n");
                     91:        if (!area->active) {
                     92:                fprintf(stderr, "- no activity\n");
                     93:                return;
                     94:        }
                     95:        fprintf(stderr, "- active address range:\n  0x%04x-0x%04x\n",
                     96:                area->lowest, area->highest);
                     97:        fprintf(stderr, "- active instruction addresses:\n  %d\n",
                     98:                area->active);
                     99:        fprintf(stderr, "- executed instructions:\n  %"PRIu64"\n",
                    100:                area->counters.count);
                    101:        /* indicates either instruction(s) that address different memory areas
                    102:         * (they can have different access costs), or more significantly,
                    103:         * DSP code that has changed during profiling.
                    104:         */
                    105:        fprintf(stderr, "- sum of per instruction cycle changes\n"
                    106:                "  (can indicate code change during profiling):\n  %"PRIu64"\n",
1.1.1.3   root      107:                area->counters.cycles_diffs);
1.1       root      108: 
                    109:        fprintf(stderr, "- used cycles:\n  %"PRIu64"\n",
                    110:                area->counters.cycles);
                    111:        if (area->overflow) {
                    112:                fprintf(stderr, "  *** COUNTERS OVERFLOW! ***\n");
                    113:        }
                    114:        fprintf(stderr, "\n= %.5fs\n", (double)(area->counters.cycles) / MachineClocks.DSP_Freq);
                    115: }
                    116: 
                    117: /**
                    118:  * Show DSP instructions which execution was profiled, in the address order,
                    119:  * starting from the given address.  Return next disassembly address.
                    120:  */
1.1.1.4   root      121: Uint16 Profile_DspShowAddresses(Uint32 addr, Uint32 upper, FILE *out, paging_t use_paging)
1.1       root      122: {
1.1.1.4   root      123:        int show, shown, addrs, active;
1.1       root      124:        dsp_profile_item_t *data;
                    125:        Uint16 nextpc;
                    126:        Uint32 end;
                    127:        const char *symbol;
                    128: 
                    129:        data = dsp_profile.data;
                    130:        if (!data) {
                    131:                fprintf(stderr, "ERROR: no DSP profiling data available!\n");
                    132:                return 0;
                    133:        }
                    134: 
                    135:        end = DSP_PROFILE_ARR_SIZE;
                    136:        active = dsp_profile.ram.active;
                    137:        if (upper) {
                    138:                if (upper < end) {
                    139:                        end = upper;
                    140:                }
1.1.1.5 ! root      141:        }
        !           142:        show = INT_MAX;
        !           143:        if (use_paging == PAGING_ENABLED) {
1.1.1.4   root      144:                show = DebugUI_GetPageLines(ConfigureParams.Debugger.nDisasmLines, 0);
1.1.1.5 ! root      145:                if (!show) {
        !           146:                        show = INT_MAX;
1.1       root      147:                }
                    148:        }
                    149: 
                    150:        fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <max cycle difference>)\n", out);
1.1.1.4   root      151:        shown = 2; /* first and last printf */
1.1       root      152: 
1.1.1.4   root      153:        addrs = nextpc = 0;
1.1.1.5 ! root      154:        for (; shown < show && addrs < active && addr < end; addr++) {
1.1       root      155:                if (!data[addr].count) {
                    156:                        continue;
                    157:                }
                    158:                if (addr != nextpc && nextpc) {
                    159:                        fputs("[...]\n", out);
1.1.1.4   root      160:                        shown++;
1.1       root      161:                }
1.1.1.4   root      162:                symbol = Symbols_GetByDspAddress(addr, SYMTYPE_TEXT);
1.1       root      163:                if (symbol) {
                    164:                        fprintf(out, "%s:\n", symbol);
1.1.1.4   root      165:                        shown++;
1.1       root      166:                }
                    167:                nextpc = DSP_DisasmAddress(out, addr, addr);
1.1.1.4   root      168:                addrs++;
1.1       root      169:                shown++;
                    170:        }
1.1.1.5 ! root      171:        if (addr < end) {
        !           172:                printf("Disassembled %d (of active %d) DSP addresses.\n", addrs, active);
        !           173:        } else {
        !           174:                printf("Disassembled last %d (of active %d) DSP addresses, wrapping...\n", addrs, active);
        !           175:                nextpc = 0;
        !           176:        }
1.1       root      177:        return nextpc;
                    178: }
                    179: 
                    180: /**
                    181:  * compare function for qsort() to sort DSP profile data by descdending
                    182:  * address cycles counts.
                    183:  */
                    184: static int cmp_dsp_cycles(const void *p1, const void *p2)
                    185: {
                    186:        Uint64 count1 = dsp_profile.data[*(const Uint16*)p1].cycles;
                    187:        Uint64 count2 = dsp_profile.data[*(const Uint16*)p2].cycles;
                    188:        if (count1 > count2) {
                    189:                return -1;
                    190:        }
                    191:        if (count1 < count2) {
                    192:                return 1;
                    193:        }
                    194:        return 0;
                    195: }
                    196: 
                    197: /**
                    198:  * Sort DSP profile data addresses by cycle counts and show the results.
                    199:  */
                    200: void Profile_DspShowCycles(int show)
                    201: {
                    202:        int active;
                    203:        Uint16 *sort_arr, *end, addr;
                    204:        dsp_profile_item_t *data = dsp_profile.data;
                    205:        float percentage;
                    206:        Uint64 count;
                    207: 
                    208:        if (!data) {
                    209:                fprintf(stderr, "ERROR: no DSP profiling data available!\n");
                    210:                return;
                    211:        }
                    212: 
                    213:        active = dsp_profile.ram.active;
                    214:        sort_arr = dsp_profile.sort_arr;
                    215:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_dsp_cycles);
                    216: 
                    217:        printf("addr:\tcycles:\n");
                    218:        show = (show < active ? show : active);
                    219:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    220:                addr = *sort_arr;
                    221:                count = data[addr].cycles;
                    222:                percentage = 100.0*count/dsp_profile.ram.counters.cycles;
                    223:                printf("0x%04x\t%5.2f%%\t%"PRIu64"%s\n", addr, percentage, count,
                    224:                       count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    225:        }
                    226:        printf("%d DSP addresses listed.\n", show);
                    227: }
                    228: 
                    229: 
                    230: /**
                    231:  * compare function for qsort() to sort DSP profile data by descdending
                    232:  * address access counts.
                    233:  */
                    234: static int cmp_dsp_count(const void *p1, const void *p2)
                    235: {
                    236:        Uint64 count1 = dsp_profile.data[*(const Uint16*)p1].count;
                    237:        Uint64 count2 = dsp_profile.data[*(const Uint16*)p2].count;
                    238:        if (count1 > count2) {
                    239:                return -1;
                    240:        }
                    241:        if (count1 < count2) {
                    242:                return 1;
                    243:        }
                    244:        return 0;
                    245: }
                    246: 
                    247: /**
                    248:  * Sort DSP profile data addresses by call counts and show the results.
                    249:  * If symbols are requested and symbols are loaded, show (only) addresses
                    250:  * matching a symbol.
                    251:  */
                    252: void Profile_DspShowCounts(int show, bool only_symbols)
                    253: {
                    254:        dsp_profile_item_t *data = dsp_profile.data;
                    255:        int symbols, matched, active;
                    256:        Uint16 *sort_arr, *end, addr;
                    257:        const char *name;
                    258:        float percentage;
                    259:        Uint64 count;
                    260: 
                    261:        if (!data) {
                    262:                fprintf(stderr, "ERROR: no DSP profiling data available!\n");
                    263:                return;
                    264:        }
                    265:        active = dsp_profile.ram.active;
                    266:        show = (show < active ? show : active);
                    267: 
                    268:        sort_arr = dsp_profile.sort_arr;
                    269:        qsort(sort_arr, active, sizeof(*sort_arr), cmp_dsp_count);
                    270: 
                    271:        if (!only_symbols) {
                    272:                printf("addr:\tcount:\n");
                    273:                for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    274:                        addr = *sort_arr;
                    275:                        count = data[addr].count;
                    276:                        percentage = 100.0*count/dsp_profile.ram.counters.count;
                    277:                        printf("0x%04x\t%5.2f%%\t%"PRIu64"%s\n",
                    278:                               addr, percentage, count,
                    279:                               count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    280:                }
                    281:                printf("%d DSP addresses listed.\n", show);
                    282:                return;
                    283:        }
                    284: 
1.1.1.4   root      285:        symbols = Symbols_DspCodeCount();
1.1       root      286:        if (!symbols) {
                    287:                fprintf(stderr, "ERROR: no DSP symbols loaded!\n");
                    288:                return;
                    289:        }
                    290:        matched = 0;    
                    291: 
                    292:        printf("addr:\tcount:\t\tsymbol:\n");
                    293:        for (end = sort_arr + active; sort_arr < end; sort_arr++) {
                    294: 
                    295:                addr = *sort_arr;
1.1.1.4   root      296:                name = Symbols_GetByDspAddress(addr, SYMTYPE_TEXT);
1.1       root      297:                if (!name) {
                    298:                        continue;
                    299:                }
                    300:                count = data[addr].count;
                    301:                percentage = 100.0*count/dsp_profile.ram.counters.count;
                    302:                printf("0x%04x\t%.2f%%\t%"PRIu64"\t%s%s\n",
                    303:                       addr, percentage, count, name,
                    304:                       count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    305: 
                    306:                matched++;
                    307:                if (matched >= show || matched >= symbols) {
                    308:                        break;
                    309:                }
                    310:        }
                    311:        printf("%d DSP symbols listed.\n", matched);
                    312: }
                    313: 
                    314: 
                    315: static const char * addr2name(Uint32 addr, Uint64 *total)
                    316: {
                    317:        *total = dsp_profile.data[addr].count;
1.1.1.4   root      318:        return Symbols_GetByDspAddress(addr, SYMTYPE_TEXT);
1.1       root      319: }
                    320: 
                    321: /**
                    322:  * Output DSP callers info to given file.
                    323:  */
                    324: void Profile_DspShowCallers(FILE *fp)
                    325: {
                    326:        Profile_ShowCallers(fp, dsp_callinfo.sites, dsp_callinfo.site, addr2name);
                    327: }
                    328: 
                    329: /**
                    330:  * Save DSP profile information to given file.
                    331:  */
                    332: void Profile_DspSave(FILE *out)
                    333: {
                    334:        /* Comma separated descriptions for the profile disassembly data fields.
                    335:         * Instructions and cycles need to be first two fields!
                    336:         */
                    337:        fputs("Field names:\tExecuted instructions, Used cycles, Largest cycle differences (= code changes during profiling)\n", out);
                    338:        /* (Python) pegexp that matches address and all describled fields from disassembly:
                    339:         * <space>:<address> <opcodes> (<instr cycles>) <instr> <count>% (<count>, <cycles>)
                    340:         * p:0202  0aa980 000200  (07 cyc)  jclr #0,x:$ffe9,p:$0200  0.00% (6, 42)
                    341:         */
                    342:        fputs("Field regexp:\t^p:([0-9a-f]+) .*% \\((.*)\\)$\n", out);
1.1.1.4   root      343:        Profile_DspShowAddresses(0, DSP_PROFILE_ARR_SIZE, out, PAGING_DISABLED);
1.1       root      344:        Profile_DspShowCallers(out);
                    345: }
                    346: 
                    347: /* ------------------ DSP profile control ----------------- */
                    348: 
                    349: /**
                    350:  * Initialize DSP profiling when necessary.  Return true if profiling.
                    351:  */
                    352: bool Profile_DspStart(void)
                    353: {
                    354:        dsp_profile_item_t *item;
                    355:        int i;
                    356: 
                    357:        Profile_FreeCallinfo(&(dsp_callinfo));
                    358:        if (dsp_profile.sort_arr) {
                    359:                /* remove previous results */
                    360:                free(dsp_profile.sort_arr);
                    361:                free(dsp_profile.data);
                    362:                dsp_profile.sort_arr = NULL;
                    363:                dsp_profile.data = NULL;
                    364:                printf("Freed previous DSP profile buffers.\n");
                    365:        }
                    366:        if (!dsp_profile.enabled) {
                    367:                return false;
                    368:        }
                    369:        /* zero everything */
                    370:        memset(&dsp_profile, 0, sizeof(dsp_profile));
                    371: 
                    372:        dsp_profile.data = calloc(DSP_PROFILE_ARR_SIZE, sizeof(*dsp_profile.data));
                    373:        if (!dsp_profile.data) {
                    374:                perror("ERROR, new DSP profile buffer alloc failed");
                    375:                return false;
                    376:        }
                    377:        printf("Allocated DSP profile buffer (%d KB).\n",
                    378:               (int)sizeof(*dsp_profile.data)*DSP_PROFILE_ARR_SIZE/1024);
                    379: 
1.1.1.4   root      380:        Profile_AllocCallinfo(&(dsp_callinfo), Symbols_DspCodeCount(), "DSP");
1.1       root      381: 
                    382:        item = dsp_profile.data;
                    383:        for (i = 0; i < DSP_PROFILE_ARR_SIZE; i++, item++) {
                    384:                item->min_cycle = 0xFFFF;
                    385:        }
                    386:        dsp_profile.prev_pc = DSP_GetPC();
                    387: 
1.1.1.2   root      388:        dsp_profile.loop_start = 0xFFFF;
                    389:        dsp_profile.loop_end = 0xFFFF;
                    390:        dsp_profile.loop_count = 0;
                    391:        Profile_LoopReset();
                    392: 
1.1       root      393:        dsp_profile.disasm_addr = 0;
                    394:        dsp_profile.processed = false;
                    395:        dsp_profile.enabled = true;
                    396:        return dsp_profile.enabled;
                    397: }
                    398: 
                    399: /* return true if pc is next instruction for previous pc */
                    400: static bool is_prev_instr(Uint16 prev_pc, Uint16 pc)
                    401: {
                    402:        /* just moved to next instruction (1-2 words)? */
                    403:        if (prev_pc < pc && (pc - prev_pc) <= 4) {
                    404:                return true;
                    405:        }
                    406:        return false;
                    407: }
                    408: 
                    409: /* return branch type based on caller instruction type */
                    410: static calltype_t dsp_opcode_type(Uint16 prev_pc, Uint16 pc)
                    411: {
                    412:        const char *dummy;
                    413:        Uint32 opcode;
                    414: 
                    415:        /* 24-bit instruction opcode */
                    416:        opcode = DSP_ReadMemory(prev_pc, 'P', &dummy) & 0xFFFFFF;
                    417: 
                    418:        /* subroutine returns */
                    419:        if (opcode == 0xC) {    /* (just) RTS */
                    420:                return CALL_SUBRETURN;
                    421:        }
                    422:        /* unconditional subroutine calls */
                    423:        if ((opcode & 0xFFF000) == 0xD0000 ||   /* JSR   00001101 0000aaaa aaaaaaaa */
                    424:            (opcode & 0xFFC0FF) == 0xBC080) {   /* JSR   00001011 11MMMRRR 10000000 */
                    425:                return CALL_SUBROUTINE;
                    426:        }
                    427:        /* conditional subroutine calls */
                    428:        if ((opcode & 0xFF0000) == 0xF0000 ||   /* JSCC  00001111 CCCCaaaa aaaaaaaa */
                    429:            (opcode & 0xFFC0F0) == 0xBC0A0 ||   /* JSCC  00001011 11MMMRRR 1010CCCC */
                    430:            (opcode & 0xFFC0A0) == 0xB4080 ||   /* JSCLR 00001011 01MMMRRR 1S0bbbbb */
                    431:            (opcode & 0xFFC0A0) == 0xB0080 ||   /* JSCLR 00001011 00aaaaaa 1S0bbbbb */
                    432:            (opcode & 0xFFC0A0) == 0xB8080 ||   /* JSCLR 00001011 10pppppp 1S0bbbbb */
                    433:            (opcode & 0xFFC0E0) == 0xBC000 ||   /* JSCLR 00001011 11DDDDDD 000bbbbb */
                    434:            (opcode & 0xFFC0A0) == 0xB40A0 ||   /* JSSET 00001011 01MMMRRR 1S1bbbbb */
                    435:            (opcode & 0xFFC0A0) == 0xB00A0 ||   /* JSSET 00001011 00aaaaaa 1S1bbbbb */
                    436:            (opcode & 0xFFC0A0) == 0xB80A0 ||   /* JSSET 00001011 10pppppp 1S1bbbbb */
                    437:            (opcode & 0xFFC0E0) == 0xBC020) {   /* JSSET 00001011 11DDDDDD 001bbbbb */
                    438:                /* hopefully fairly safe heuristic:
                    439:                 * if previously executed instruction
                    440:                 * was one before current one, no
                    441:                 * subroutine call was made to next
                    442:                 * instruction, the condition just
                    443:                 * wasn't met.
                    444:                 */
                    445:                if (is_prev_instr(prev_pc, pc)) {
                    446:                        return CALL_NEXT;
                    447:                }
                    448:                return CALL_SUBROUTINE;
                    449:        }
                    450:        /* exception handler returns */
                    451:        if (opcode == 0x4) {    /* (just) RTI */
                    452:                return CALL_EXCRETURN;
                    453:        }
                    454: 
                    455:        /* Besides CALL_UNKNOWN, rest isn't used by subroutine call
                    456:         * cost collection.  However, it's useful info when debugging
                    457:         * code or reading full callgraphs (because optimized code uses
                    458:         * also jumps/branches for subroutine calls).
                    459:         */
                    460: 
                    461:        /* TODO: exception invocation.
                    462:         * Could be detected by PC going through low interrupt vector adresses,
                    463:         * but fast-calls using JSR/RTS would need separate handling.
                    464:         */
                    465:        if (0) {        /* TODO */
                    466:                return CALL_EXCEPTION;
                    467:        }
                    468:        /* branches */
                    469:        if ((opcode & 0xFFF000) == 0xC0000 ||   /* JMP  00001100 0000aaaa aaaaaaaa */
                    470:            (opcode & 0xFFC0FF) == 0xAC080 ||   /* JMP  00001010 11MMMRRR 10000000 */
                    471:            (opcode & 0xFF0000) == 0xE0000 ||   /* JCC  00001110 CCCCaaaa aaaaaaaa */
                    472:            (opcode & 0xFFC0F0) == 0xAC0A0 ||   /* JCC  00001010 11MMMRRR 1010CCCC */
                    473:            (opcode & 0xFFC0A0) == 0xA8080 ||   /* JCLR 00001010 10pppppp 1S0bbbbb */
                    474:            (opcode & 0xFFC0A0) == 0xA4080 ||   /* JCLR 00001010 01MMMRRR 1S0bbbbb */
                    475:            (opcode & 0xFFC0A0) == 0xA0080 ||   /* JCLR 00001010 00aaaaaa 1S0bbbbb */
                    476:            (opcode & 0xFFC0E0) == 0xAC000 ||   /* JCLR 00001010 11dddddd 000bbbbb */
                    477:            (opcode & 0xFFC0A0) == 0xA80A0 ||   /* JSET 00001010 10pppppp 1S1bbbbb */
                    478:            (opcode & 0xFFC0A0) == 0xA40A0 ||   /* JSET 00001010 01MMMRRR 1S1bbbbb */
                    479:            (opcode & 0xFFC0A0) == 0xA00A0 ||   /* JSET 00001010 00aaaaaa 1S1bbbbb */
                    480:            (opcode & 0xFFC0E0) == 0xAC020 ||   /* JSET 00001010 11dddddd 001bbbbb */
                    481:            (opcode & 0xFF00F0) == 0x600A0 ||   /* REP  00000110 iiiiiiii 1010hhhh */
                    482:            (opcode & 0xFFC0FF) == 0x6C020 ||   /* REP  00000110 11dddddd 00100000 */
                    483:            (opcode & 0xFFC0BF) == 0x64020 ||   /* REP  00000110 01MMMRRR 0s100000 */
                    484:            (opcode & 0xFFC0BF) == 0x60020 ||   /* REP  00000110 00aaaaaa 0s100000 */
                    485:            (opcode & 0xFF00F0) == 0x60080 ||   /* DO/ENDO 00000110 iiiiiiii 1000hhhh */
                    486:            (opcode & 0xFFC0FF) == 0x6C000 ||   /* DO/ENDO 00000110 11DDDDDD 00000000 */
                    487:            (opcode & 0xFFC0BF) == 0x64000 ||   /* DO/ENDO 00000110 01MMMRRR 0S000000 */
                    488:            (opcode & 0xFFC0BF) == 0x60000) {   /* DO/ENDO 00000110 00aaaaaa 0S000000 */
                    489:                return CALL_BRANCH;
                    490:        }
                    491:        if (is_prev_instr(prev_pc, pc)) {
                    492:                return CALL_NEXT;
                    493:        }
                    494:        return CALL_UNKNOWN;
                    495: }
                    496: 
                    497: /**
                    498:  * If call tracking is enabled (there are symbols), collect
                    499:  * information about subroutine and other calls, and their costs.
                    500:  * 
                    501:  * Like with profile data, caller info checks need to be for previous
                    502:  * instruction, that's why "pc" argument for this function actually
                    503:  * needs to be previous PC.
                    504:  */
                    505: static void collect_calls(Uint16 pc, counters_t *counters)
                    506: {
                    507:        calltype_t flag;
                    508:        Uint16 prev_pc;
                    509:        Uint32 caller_pc;
                    510:        int idx;
                    511: 
                    512:        prev_pc = dsp_callinfo.prev_pc;
                    513:        dsp_callinfo.prev_pc = pc;
                    514:        caller_pc = PC_UNDEFINED;
                    515: 
                    516:        /* address is return address for last subroutine call? */
                    517:        if (unlikely(pc == dsp_callinfo.return_pc) && likely(dsp_callinfo.depth)) {
                    518: 
                    519:                flag = dsp_opcode_type(prev_pc, pc);
                    520:                /* return address is entered either by subroutine return,
                    521:                 * or by returning from exception that interrupted
                    522:                 * the instruction at return address.
                    523:                 */
                    524:                if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) {
                    525:                        caller_pc = Profile_CallEnd(&dsp_callinfo, counters);
                    526:                }
                    527:        }
                    528: 
                    529:        /* address is one which we're tracking? */
1.1.1.4   root      530:        idx = Symbols_GetDspCodeIndex(pc);
1.1       root      531:        if (unlikely(idx >= 0)) {
                    532: 
                    533:                flag = dsp_opcode_type(prev_pc, pc);
                    534:                if (flag == CALL_SUBROUTINE) {
                    535:                        dsp_callinfo.return_pc = DSP_GetNextPC(prev_pc);  /* slow! */
                    536:                } else if (caller_pc != PC_UNDEFINED) {
                    537:                        /* returned from function, change return
                    538:                         * instruction address to address of
                    539:                         * what did the returned call.
                    540:                         */
                    541:                        prev_pc = caller_pc;
                    542:                        assert(is_prev_instr(prev_pc, pc));
                    543:                        flag = CALL_NEXT;
                    544:                }
                    545:                Profile_CallStart(idx, &dsp_callinfo, prev_pc, flag, pc, counters);
                    546: 
                    547:        }
                    548: }
                    549: 
                    550: /**
1.1.1.2   root      551:  * log last loop info, if there's suitable data for one
                    552:  */
                    553: static void log_last_loop(void)
                    554: {
                    555:        unsigned len = dsp_profile.loop_end - dsp_profile.loop_start;
                    556:        if (dsp_profile.loop_count > 1 && (len < profile_loop.dsp_limit || !profile_loop.dsp_limit)) {
                    557:                fprintf(profile_loop.fp, "DSP %d 0x%04x %d %d\n", nVBLs,
                    558:                        dsp_profile.loop_start, len, dsp_profile.loop_count);
                    559:                fflush(profile_loop.fp);
                    560:        }
                    561: }
                    562: 
                    563: /**
1.1       root      564:  * Update DSP cycle and count statistics for PC address.
                    565:  *
                    566:  * This is called after instruction is executed and PC points
                    567:  * to next instruction i.e. info is for previous PC address.
                    568:  */
                    569: void Profile_DspUpdate(void)
                    570: {
                    571:        dsp_profile_item_t *prev;
                    572:        Uint16 pc, prev_pc, cycles;
                    573:        counters_t *counters;
                    574: 
                    575:        prev_pc = dsp_profile.prev_pc;
                    576:        dsp_profile.prev_pc = pc = DSP_GetPC();
1.1.1.2   root      577: 
                    578:        if (unlikely(profile_loop.fp)) {
                    579:                if (pc < prev_pc) {
                    580:                        if (pc == dsp_profile.loop_start && prev_pc == dsp_profile.loop_end) {
                    581:                                dsp_profile.loop_count++;
                    582:                        } else {
                    583:                                dsp_profile.loop_start = pc;
                    584:                                dsp_profile.loop_end = prev_pc;
                    585:                                dsp_profile.loop_count = 1;
                    586:                        }
                    587:                } else {
                    588:                        if (pc > dsp_profile.loop_end) {
                    589:                                log_last_loop();
                    590:                                dsp_profile.loop_end = 0xFFFF;
                    591:                                dsp_profile.loop_count = 0;
                    592:                        }
                    593:                }
                    594:        }
                    595: 
1.1       root      596:        prev = dsp_profile.data + prev_pc;
                    597: 
                    598:        if (likely(prev->count < MAX_DSP_PROFILE_VALUE)) {
                    599:                prev->count++;
                    600:        }
                    601: 
                    602:        cycles = DSP_GetInstrCycles();
                    603:        if (likely(prev->cycles < MAX_DSP_PROFILE_VALUE - cycles)) {
                    604:                prev->cycles += cycles;
                    605:        } else {
                    606:                prev->cycles = MAX_DSP_PROFILE_VALUE;
                    607:        }
                    608: 
                    609:        if (unlikely(cycles < prev->min_cycle)) {
                    610:                prev->min_cycle = cycles;
                    611:        }
                    612:        if (unlikely(cycles > prev->max_cycle)) {
                    613:                prev->max_cycle = cycles;
                    614:        }
                    615: 
                    616:        counters = &(dsp_profile.ram.counters);
                    617:        if (dsp_callinfo.sites) {
                    618:                collect_calls(prev_pc, counters);
                    619:        }
                    620:        /* counters are increased after caller info is processed,
                    621:         * otherwise cost for the instruction calling the callee
                    622:         * doesn't get accounted to caller (but callee).
                    623:         */
                    624:        counters->cycles += cycles;
                    625:        counters->count++;
                    626: }
                    627: 
                    628: /**
                    629:  * Helper for collecting DSP profile area statistics.
                    630:  */
                    631: static void update_area_item(profile_area_t *area, Uint16 addr, dsp_profile_item_t *item)
                    632: {
                    633:        Uint64 cycles = item->cycles;
                    634:        Uint64 count = item->count;
                    635:        Uint16 diff;
                    636: 
                    637:        if (!count) {
                    638:                return;
                    639:        }
                    640:        if (cycles == MAX_DSP_PROFILE_VALUE) {
                    641:                area->overflow = true;
                    642:        }
                    643:        if (item->max_cycle) {
                    644:                diff = item->max_cycle - item->min_cycle;
                    645:        } else {
                    646:                diff = 0;
                    647:        }
                    648: 
                    649:        area->counters.count += count;
                    650:        area->counters.cycles += cycles;
1.1.1.3   root      651:        area->counters.cycles_diffs += diff;
1.1       root      652: 
                    653:        if (addr < area->lowest) {
                    654:                area->lowest = addr;
                    655:        }
                    656:        area->highest = addr;
                    657: 
                    658:        area->active++;
                    659: }
                    660: 
                    661: /**
                    662:  * Stop and process the DSP profiling data; collect stats and
                    663:  * prepare for more optimal sorting.
                    664:  */
                    665: void Profile_DspStop(void)
                    666: {
                    667:        dsp_profile_item_t *item;
                    668:        profile_area_t *area;
                    669:        Uint16 *sort_arr;
                    670:        Uint32 addr;
                    671: 
                    672:        if (dsp_profile.processed || !dsp_profile.enabled) {
                    673:                return;
                    674:        }
                    675: 
1.1.1.2   root      676:        log_last_loop();
                    677:        if (profile_loop.fp) {
                    678:                fflush(profile_loop.fp);
                    679:        }
                    680: 
1.1       root      681:        Profile_FinalizeCalls(&(dsp_callinfo), &(dsp_profile.ram.counters), Symbols_GetByDspAddress);
                    682: 
                    683:        /* find lowest and highest  addresses executed */
                    684:        area = &dsp_profile.ram;
                    685:        memset(area, 0, sizeof(profile_area_t));
                    686:        area->lowest = DSP_PROFILE_ARR_SIZE;
                    687: 
                    688:        item = dsp_profile.data;
                    689:        for (addr = 0; addr < DSP_PROFILE_ARR_SIZE; addr++, item++) {
                    690:                update_area_item(area, addr, item);
                    691:        }
                    692: 
                    693:        /* allocate address array for sorting */
                    694:        sort_arr = calloc(dsp_profile.ram.active, sizeof(*sort_arr));
                    695: 
                    696:        if (!sort_arr) {
                    697:                perror("ERROR: allocating DSP profile address data");
                    698:                free(dsp_profile.data);
                    699:                dsp_profile.data = NULL;
                    700:                return;
                    701:        }
                    702:        printf("Allocated DSP profile address buffer (%d KB).\n",
                    703:               (int)sizeof(*sort_arr)*(dsp_profile.ram.active+512)/1024);
                    704:        dsp_profile.sort_arr = sort_arr;
                    705: 
                    706:        /* ...and fill addresses for used instructions... */
                    707:        area = &dsp_profile.ram;
                    708:        item = &(dsp_profile.data[area->lowest]);
                    709:        for (addr = area->lowest; addr <= area->highest; addr++, item++) {
                    710:                if (item->count) {
                    711:                        *sort_arr++ = addr;
                    712:                }
                    713:        }
                    714:        //printf("%d/%d/%d\n", area->active, sort_arr-dsp_profile.sort_arr, active);
                    715: 
                    716:        Profile_DspShowStats();
                    717:        dsp_profile.processed = true;
                    718: }
                    719: 
                    720: /**
                    721:  * Get pointers to DSP profile enabling and disasm address variables
                    722:  * for updating them (in parser).
                    723:  */
                    724: void Profile_DspGetPointers(bool **enabled, Uint32 **disasm_addr)
                    725: {
                    726:        *disasm_addr = &dsp_profile.disasm_addr;
                    727:        *enabled = &dsp_profile.enabled;
                    728: }
                    729: 
                    730: /**
                    731:  * Get callinfo & symbol search pointers for stack walking.
                    732:  */
1.1.1.4   root      733: void Profile_DspGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32, symtype_t))
1.1       root      734: {
                    735:        *callinfo = &(dsp_callinfo);
                    736:        *get_symbol = Symbols_GetByDspAddress;
                    737: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.