|
|
1.1 ! root 1: /* ! 2: * Hatari - profiledsp.c ! 3: * ! 4: * Copyright (C) 2010-2013 by Eero Tamminen ! 5: * ! 6: * This file is distributed under the GNU General Public License, version 2 ! 7: * or at your option any later version. Read the file gpl.txt for details. ! 8: * ! 9: * profiledsp.c - functions for profiling DSP and showing the results. ! 10: */ ! 11: const char Profiledsp_fileid[] = "Hatari profiledsp.c : " __DATE__ " " __TIME__; ! 12: ! 13: #include <stdio.h> ! 14: #include <inttypes.h> ! 15: #include <assert.h> ! 16: #include "main.h" ! 17: #include "configuration.h" ! 18: #include "clocks_timings.h" ! 19: #include "dsp.h" ! 20: #include "profile.h" ! 21: #include "profile_priv.h" ! 22: #include "symbols.h" ! 23: ! 24: static callinfo_t dsp_callinfo; ! 25: ! 26: #define DSP_PROFILE_ARR_SIZE 0x10000 ! 27: #define MAX_DSP_PROFILE_VALUE 0xFFFFFFFFFFFFFFFFLL ! 28: ! 29: typedef struct { ! 30: Uint64 count; /* how many times this address is used */ ! 31: Uint64 cycles; /* how many DSP cycles was taken at this address */ ! 32: Uint16 min_cycle; ! 33: Uint16 max_cycle; ! 34: } dsp_profile_item_t; ! 35: ! 36: static struct { ! 37: dsp_profile_item_t *data; /* profile data */ ! 38: profile_area_t ram; /* statistics for whole memory */ ! 39: Uint16 *sort_arr; /* data indexes used for sorting */ ! 40: Uint16 prev_pc; /* previous PC for which the cycles are for */ ! 41: Uint32 disasm_addr; /* 'dspaddresses' command start address */ ! 42: bool processed; /* true when data is already processed */ ! 43: bool enabled; /* true when profiling enabled */ ! 44: } dsp_profile; ! 45: ! 46: ! 47: /* ------------------ DSP profile results ----------------- */ ! 48: ! 49: /** ! 50: * Get DSP cycles, count and count percentage for given address. ! 51: * Return true if data was available and non-zero, false otherwise. ! 52: */ ! 53: bool Profile_DspAddressData(Uint16 addr, float *percentage, Uint64 *count, Uint64 *cycles, Uint16 *cycle_diff) ! 54: { ! 55: dsp_profile_item_t *item; ! 56: if (!dsp_profile.data) { ! 57: return false; ! 58: } ! 59: item = dsp_profile.data + addr; ! 60: ! 61: *cycles = item->cycles; ! 62: *count = item->count; ! 63: if (item->max_cycle) { ! 64: *cycle_diff = item->max_cycle - item->min_cycle; ! 65: } else { ! 66: *cycle_diff = 0; ! 67: } ! 68: if (dsp_profile.ram.counters.count) { ! 69: *percentage = 100.0*(*count)/dsp_profile.ram.counters.count; ! 70: } else { ! 71: *percentage = 0.0; ! 72: } ! 73: return (*count > 0); ! 74: } ! 75: ! 76: /** ! 77: * show DSP specific profile statistics. ! 78: */ ! 79: void Profile_DspShowStats(void) ! 80: { ! 81: profile_area_t *area = &dsp_profile.ram; ! 82: fprintf(stderr, "DSP profile statistics (0x0-0xFFFF):\n"); ! 83: if (!area->active) { ! 84: fprintf(stderr, "- no activity\n"); ! 85: return; ! 86: } ! 87: fprintf(stderr, "- active address range:\n 0x%04x-0x%04x\n", ! 88: area->lowest, area->highest); ! 89: fprintf(stderr, "- active instruction addresses:\n %d\n", ! 90: area->active); ! 91: fprintf(stderr, "- executed instructions:\n %"PRIu64"\n", ! 92: area->counters.count); ! 93: /* indicates either instruction(s) that address different memory areas ! 94: * (they can have different access costs), or more significantly, ! 95: * DSP code that has changed during profiling. ! 96: */ ! 97: fprintf(stderr, "- sum of per instruction cycle changes\n" ! 98: " (can indicate code change during profiling):\n %"PRIu64"\n", ! 99: area->counters.misses); ! 100: ! 101: fprintf(stderr, "- used cycles:\n %"PRIu64"\n", ! 102: area->counters.cycles); ! 103: if (area->overflow) { ! 104: fprintf(stderr, " *** COUNTERS OVERFLOW! ***\n"); ! 105: } ! 106: fprintf(stderr, "\n= %.5fs\n", (double)(area->counters.cycles) / MachineClocks.DSP_Freq); ! 107: } ! 108: ! 109: /** ! 110: * Show DSP instructions which execution was profiled, in the address order, ! 111: * starting from the given address. Return next disassembly address. ! 112: */ ! 113: Uint16 Profile_DspShowAddresses(Uint32 addr, Uint32 upper, FILE *out) ! 114: { ! 115: int show, shown, active; ! 116: dsp_profile_item_t *data; ! 117: Uint16 nextpc; ! 118: Uint32 end; ! 119: const char *symbol; ! 120: ! 121: data = dsp_profile.data; ! 122: if (!data) { ! 123: fprintf(stderr, "ERROR: no DSP profiling data available!\n"); ! 124: return 0; ! 125: } ! 126: ! 127: end = DSP_PROFILE_ARR_SIZE; ! 128: active = dsp_profile.ram.active; ! 129: show = ConfigureParams.Debugger.nDisasmLines; ! 130: if (upper) { ! 131: if (upper < end) { ! 132: end = upper; ! 133: } ! 134: show = active; ! 135: } else { ! 136: show = ConfigureParams.Debugger.nDisasmLines; ! 137: if (!show || show > active) { ! 138: show = active; ! 139: } ! 140: } ! 141: ! 142: fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <max cycle difference>)\n", out); ! 143: ! 144: nextpc = 0; ! 145: for (shown = 0; shown < show && addr < end; addr++) { ! 146: if (!data[addr].count) { ! 147: continue; ! 148: } ! 149: if (addr != nextpc && nextpc) { ! 150: fputs("[...]\n", out); ! 151: } ! 152: symbol = Symbols_GetByDspAddress(addr); ! 153: if (symbol) { ! 154: fprintf(out, "%s:\n", symbol); ! 155: } ! 156: nextpc = DSP_DisasmAddress(out, addr, addr); ! 157: shown++; ! 158: } ! 159: printf("Disassembled %d (of active %d) DSP addresses.\n", shown, active); ! 160: return nextpc; ! 161: } ! 162: ! 163: /** ! 164: * compare function for qsort() to sort DSP profile data by descdending ! 165: * address cycles counts. ! 166: */ ! 167: static int cmp_dsp_cycles(const void *p1, const void *p2) ! 168: { ! 169: Uint64 count1 = dsp_profile.data[*(const Uint16*)p1].cycles; ! 170: Uint64 count2 = dsp_profile.data[*(const Uint16*)p2].cycles; ! 171: if (count1 > count2) { ! 172: return -1; ! 173: } ! 174: if (count1 < count2) { ! 175: return 1; ! 176: } ! 177: return 0; ! 178: } ! 179: ! 180: /** ! 181: * Sort DSP profile data addresses by cycle counts and show the results. ! 182: */ ! 183: void Profile_DspShowCycles(int show) ! 184: { ! 185: int active; ! 186: Uint16 *sort_arr, *end, addr; ! 187: dsp_profile_item_t *data = dsp_profile.data; ! 188: float percentage; ! 189: Uint64 count; ! 190: ! 191: if (!data) { ! 192: fprintf(stderr, "ERROR: no DSP profiling data available!\n"); ! 193: return; ! 194: } ! 195: ! 196: active = dsp_profile.ram.active; ! 197: sort_arr = dsp_profile.sort_arr; ! 198: qsort(sort_arr, active, sizeof(*sort_arr), cmp_dsp_cycles); ! 199: ! 200: printf("addr:\tcycles:\n"); ! 201: show = (show < active ? show : active); ! 202: for (end = sort_arr + show; sort_arr < end; sort_arr++) { ! 203: addr = *sort_arr; ! 204: count = data[addr].cycles; ! 205: percentage = 100.0*count/dsp_profile.ram.counters.cycles; ! 206: printf("0x%04x\t%5.2f%%\t%"PRIu64"%s\n", addr, percentage, count, ! 207: count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : ""); ! 208: } ! 209: printf("%d DSP addresses listed.\n", show); ! 210: } ! 211: ! 212: ! 213: /** ! 214: * compare function for qsort() to sort DSP profile data by descdending ! 215: * address access counts. ! 216: */ ! 217: static int cmp_dsp_count(const void *p1, const void *p2) ! 218: { ! 219: Uint64 count1 = dsp_profile.data[*(const Uint16*)p1].count; ! 220: Uint64 count2 = dsp_profile.data[*(const Uint16*)p2].count; ! 221: if (count1 > count2) { ! 222: return -1; ! 223: } ! 224: if (count1 < count2) { ! 225: return 1; ! 226: } ! 227: return 0; ! 228: } ! 229: ! 230: /** ! 231: * Sort DSP profile data addresses by call counts and show the results. ! 232: * If symbols are requested and symbols are loaded, show (only) addresses ! 233: * matching a symbol. ! 234: */ ! 235: void Profile_DspShowCounts(int show, bool only_symbols) ! 236: { ! 237: dsp_profile_item_t *data = dsp_profile.data; ! 238: int symbols, matched, active; ! 239: Uint16 *sort_arr, *end, addr; ! 240: const char *name; ! 241: float percentage; ! 242: Uint64 count; ! 243: ! 244: if (!data) { ! 245: fprintf(stderr, "ERROR: no DSP profiling data available!\n"); ! 246: return; ! 247: } ! 248: active = dsp_profile.ram.active; ! 249: show = (show < active ? show : active); ! 250: ! 251: sort_arr = dsp_profile.sort_arr; ! 252: qsort(sort_arr, active, sizeof(*sort_arr), cmp_dsp_count); ! 253: ! 254: if (!only_symbols) { ! 255: printf("addr:\tcount:\n"); ! 256: for (end = sort_arr + show; sort_arr < end; sort_arr++) { ! 257: addr = *sort_arr; ! 258: count = data[addr].count; ! 259: percentage = 100.0*count/dsp_profile.ram.counters.count; ! 260: printf("0x%04x\t%5.2f%%\t%"PRIu64"%s\n", ! 261: addr, percentage, count, ! 262: count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : ""); ! 263: } ! 264: printf("%d DSP addresses listed.\n", show); ! 265: return; ! 266: } ! 267: ! 268: symbols = Symbols_DspCount(); ! 269: if (!symbols) { ! 270: fprintf(stderr, "ERROR: no DSP symbols loaded!\n"); ! 271: return; ! 272: } ! 273: matched = 0; ! 274: ! 275: printf("addr:\tcount:\t\tsymbol:\n"); ! 276: for (end = sort_arr + active; sort_arr < end; sort_arr++) { ! 277: ! 278: addr = *sort_arr; ! 279: name = Symbols_GetByDspAddress(addr); ! 280: if (!name) { ! 281: continue; ! 282: } ! 283: count = data[addr].count; ! 284: percentage = 100.0*count/dsp_profile.ram.counters.count; ! 285: printf("0x%04x\t%.2f%%\t%"PRIu64"\t%s%s\n", ! 286: addr, percentage, count, name, ! 287: count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : ""); ! 288: ! 289: matched++; ! 290: if (matched >= show || matched >= symbols) { ! 291: break; ! 292: } ! 293: } ! 294: printf("%d DSP symbols listed.\n", matched); ! 295: } ! 296: ! 297: ! 298: static const char * addr2name(Uint32 addr, Uint64 *total) ! 299: { ! 300: *total = dsp_profile.data[addr].count; ! 301: return Symbols_GetByDspAddress(addr); ! 302: } ! 303: ! 304: /** ! 305: * Output DSP callers info to given file. ! 306: */ ! 307: void Profile_DspShowCallers(FILE *fp) ! 308: { ! 309: Profile_ShowCallers(fp, dsp_callinfo.sites, dsp_callinfo.site, addr2name); ! 310: } ! 311: ! 312: /** ! 313: * Save DSP profile information to given file. ! 314: */ ! 315: void Profile_DspSave(FILE *out) ! 316: { ! 317: /* Comma separated descriptions for the profile disassembly data fields. ! 318: * Instructions and cycles need to be first two fields! ! 319: */ ! 320: fputs("Field names:\tExecuted instructions, Used cycles, Largest cycle differences (= code changes during profiling)\n", out); ! 321: /* (Python) pegexp that matches address and all describled fields from disassembly: ! 322: * <space>:<address> <opcodes> (<instr cycles>) <instr> <count>% (<count>, <cycles>) ! 323: * p:0202 0aa980 000200 (07 cyc) jclr #0,x:$ffe9,p:$0200 0.00% (6, 42) ! 324: */ ! 325: fputs("Field regexp:\t^p:([0-9a-f]+) .*% \\((.*)\\)$\n", out); ! 326: Profile_DspShowAddresses(0, DSP_PROFILE_ARR_SIZE, out); ! 327: Profile_DspShowCallers(out); ! 328: } ! 329: ! 330: /* ------------------ DSP profile control ----------------- */ ! 331: ! 332: /** ! 333: * Initialize DSP profiling when necessary. Return true if profiling. ! 334: */ ! 335: bool Profile_DspStart(void) ! 336: { ! 337: dsp_profile_item_t *item; ! 338: int i; ! 339: ! 340: Profile_FreeCallinfo(&(dsp_callinfo)); ! 341: if (dsp_profile.sort_arr) { ! 342: /* remove previous results */ ! 343: free(dsp_profile.sort_arr); ! 344: free(dsp_profile.data); ! 345: dsp_profile.sort_arr = NULL; ! 346: dsp_profile.data = NULL; ! 347: printf("Freed previous DSP profile buffers.\n"); ! 348: } ! 349: if (!dsp_profile.enabled) { ! 350: return false; ! 351: } ! 352: /* zero everything */ ! 353: memset(&dsp_profile, 0, sizeof(dsp_profile)); ! 354: ! 355: dsp_profile.data = calloc(DSP_PROFILE_ARR_SIZE, sizeof(*dsp_profile.data)); ! 356: if (!dsp_profile.data) { ! 357: perror("ERROR, new DSP profile buffer alloc failed"); ! 358: return false; ! 359: } ! 360: printf("Allocated DSP profile buffer (%d KB).\n", ! 361: (int)sizeof(*dsp_profile.data)*DSP_PROFILE_ARR_SIZE/1024); ! 362: ! 363: Profile_AllocCallinfo(&(dsp_callinfo), Symbols_DspCount(), "DSP"); ! 364: ! 365: item = dsp_profile.data; ! 366: for (i = 0; i < DSP_PROFILE_ARR_SIZE; i++, item++) { ! 367: item->min_cycle = 0xFFFF; ! 368: } ! 369: dsp_profile.prev_pc = DSP_GetPC(); ! 370: ! 371: dsp_profile.disasm_addr = 0; ! 372: dsp_profile.processed = false; ! 373: dsp_profile.enabled = true; ! 374: return dsp_profile.enabled; ! 375: } ! 376: ! 377: /* return true if pc is next instruction for previous pc */ ! 378: static bool is_prev_instr(Uint16 prev_pc, Uint16 pc) ! 379: { ! 380: /* just moved to next instruction (1-2 words)? */ ! 381: if (prev_pc < pc && (pc - prev_pc) <= 4) { ! 382: return true; ! 383: } ! 384: return false; ! 385: } ! 386: ! 387: /* return branch type based on caller instruction type */ ! 388: static calltype_t dsp_opcode_type(Uint16 prev_pc, Uint16 pc) ! 389: { ! 390: const char *dummy; ! 391: Uint32 opcode; ! 392: ! 393: /* 24-bit instruction opcode */ ! 394: opcode = DSP_ReadMemory(prev_pc, 'P', &dummy) & 0xFFFFFF; ! 395: ! 396: /* subroutine returns */ ! 397: if (opcode == 0xC) { /* (just) RTS */ ! 398: return CALL_SUBRETURN; ! 399: } ! 400: /* unconditional subroutine calls */ ! 401: if ((opcode & 0xFFF000) == 0xD0000 || /* JSR 00001101 0000aaaa aaaaaaaa */ ! 402: (opcode & 0xFFC0FF) == 0xBC080) { /* JSR 00001011 11MMMRRR 10000000 */ ! 403: return CALL_SUBROUTINE; ! 404: } ! 405: /* conditional subroutine calls */ ! 406: if ((opcode & 0xFF0000) == 0xF0000 || /* JSCC 00001111 CCCCaaaa aaaaaaaa */ ! 407: (opcode & 0xFFC0F0) == 0xBC0A0 || /* JSCC 00001011 11MMMRRR 1010CCCC */ ! 408: (opcode & 0xFFC0A0) == 0xB4080 || /* JSCLR 00001011 01MMMRRR 1S0bbbbb */ ! 409: (opcode & 0xFFC0A0) == 0xB0080 || /* JSCLR 00001011 00aaaaaa 1S0bbbbb */ ! 410: (opcode & 0xFFC0A0) == 0xB8080 || /* JSCLR 00001011 10pppppp 1S0bbbbb */ ! 411: (opcode & 0xFFC0E0) == 0xBC000 || /* JSCLR 00001011 11DDDDDD 000bbbbb */ ! 412: (opcode & 0xFFC0A0) == 0xB40A0 || /* JSSET 00001011 01MMMRRR 1S1bbbbb */ ! 413: (opcode & 0xFFC0A0) == 0xB00A0 || /* JSSET 00001011 00aaaaaa 1S1bbbbb */ ! 414: (opcode & 0xFFC0A0) == 0xB80A0 || /* JSSET 00001011 10pppppp 1S1bbbbb */ ! 415: (opcode & 0xFFC0E0) == 0xBC020) { /* JSSET 00001011 11DDDDDD 001bbbbb */ ! 416: /* hopefully fairly safe heuristic: ! 417: * if previously executed instruction ! 418: * was one before current one, no ! 419: * subroutine call was made to next ! 420: * instruction, the condition just ! 421: * wasn't met. ! 422: */ ! 423: if (is_prev_instr(prev_pc, pc)) { ! 424: return CALL_NEXT; ! 425: } ! 426: return CALL_SUBROUTINE; ! 427: } ! 428: /* exception handler returns */ ! 429: if (opcode == 0x4) { /* (just) RTI */ ! 430: return CALL_EXCRETURN; ! 431: } ! 432: ! 433: /* Besides CALL_UNKNOWN, rest isn't used by subroutine call ! 434: * cost collection. However, it's useful info when debugging ! 435: * code or reading full callgraphs (because optimized code uses ! 436: * also jumps/branches for subroutine calls). ! 437: */ ! 438: ! 439: /* TODO: exception invocation. ! 440: * Could be detected by PC going through low interrupt vector adresses, ! 441: * but fast-calls using JSR/RTS would need separate handling. ! 442: */ ! 443: if (0) { /* TODO */ ! 444: return CALL_EXCEPTION; ! 445: } ! 446: /* branches */ ! 447: if ((opcode & 0xFFF000) == 0xC0000 || /* JMP 00001100 0000aaaa aaaaaaaa */ ! 448: (opcode & 0xFFC0FF) == 0xAC080 || /* JMP 00001010 11MMMRRR 10000000 */ ! 449: (opcode & 0xFF0000) == 0xE0000 || /* JCC 00001110 CCCCaaaa aaaaaaaa */ ! 450: (opcode & 0xFFC0F0) == 0xAC0A0 || /* JCC 00001010 11MMMRRR 1010CCCC */ ! 451: (opcode & 0xFFC0A0) == 0xA8080 || /* JCLR 00001010 10pppppp 1S0bbbbb */ ! 452: (opcode & 0xFFC0A0) == 0xA4080 || /* JCLR 00001010 01MMMRRR 1S0bbbbb */ ! 453: (opcode & 0xFFC0A0) == 0xA0080 || /* JCLR 00001010 00aaaaaa 1S0bbbbb */ ! 454: (opcode & 0xFFC0E0) == 0xAC000 || /* JCLR 00001010 11dddddd 000bbbbb */ ! 455: (opcode & 0xFFC0A0) == 0xA80A0 || /* JSET 00001010 10pppppp 1S1bbbbb */ ! 456: (opcode & 0xFFC0A0) == 0xA40A0 || /* JSET 00001010 01MMMRRR 1S1bbbbb */ ! 457: (opcode & 0xFFC0A0) == 0xA00A0 || /* JSET 00001010 00aaaaaa 1S1bbbbb */ ! 458: (opcode & 0xFFC0E0) == 0xAC020 || /* JSET 00001010 11dddddd 001bbbbb */ ! 459: (opcode & 0xFF00F0) == 0x600A0 || /* REP 00000110 iiiiiiii 1010hhhh */ ! 460: (opcode & 0xFFC0FF) == 0x6C020 || /* REP 00000110 11dddddd 00100000 */ ! 461: (opcode & 0xFFC0BF) == 0x64020 || /* REP 00000110 01MMMRRR 0s100000 */ ! 462: (opcode & 0xFFC0BF) == 0x60020 || /* REP 00000110 00aaaaaa 0s100000 */ ! 463: (opcode & 0xFF00F0) == 0x60080 || /* DO/ENDO 00000110 iiiiiiii 1000hhhh */ ! 464: (opcode & 0xFFC0FF) == 0x6C000 || /* DO/ENDO 00000110 11DDDDDD 00000000 */ ! 465: (opcode & 0xFFC0BF) == 0x64000 || /* DO/ENDO 00000110 01MMMRRR 0S000000 */ ! 466: (opcode & 0xFFC0BF) == 0x60000) { /* DO/ENDO 00000110 00aaaaaa 0S000000 */ ! 467: return CALL_BRANCH; ! 468: } ! 469: if (is_prev_instr(prev_pc, pc)) { ! 470: return CALL_NEXT; ! 471: } ! 472: return CALL_UNKNOWN; ! 473: } ! 474: ! 475: /** ! 476: * If call tracking is enabled (there are symbols), collect ! 477: * information about subroutine and other calls, and their costs. ! 478: * ! 479: * Like with profile data, caller info checks need to be for previous ! 480: * instruction, that's why "pc" argument for this function actually ! 481: * needs to be previous PC. ! 482: */ ! 483: static void collect_calls(Uint16 pc, counters_t *counters) ! 484: { ! 485: calltype_t flag; ! 486: Uint16 prev_pc; ! 487: Uint32 caller_pc; ! 488: int idx; ! 489: ! 490: prev_pc = dsp_callinfo.prev_pc; ! 491: dsp_callinfo.prev_pc = pc; ! 492: caller_pc = PC_UNDEFINED; ! 493: ! 494: /* address is return address for last subroutine call? */ ! 495: if (unlikely(pc == dsp_callinfo.return_pc) && likely(dsp_callinfo.depth)) { ! 496: ! 497: flag = dsp_opcode_type(prev_pc, pc); ! 498: /* return address is entered either by subroutine return, ! 499: * or by returning from exception that interrupted ! 500: * the instruction at return address. ! 501: */ ! 502: if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) { ! 503: caller_pc = Profile_CallEnd(&dsp_callinfo, counters); ! 504: } ! 505: } ! 506: ! 507: /* address is one which we're tracking? */ ! 508: idx = Symbols_GetDspAddressIndex(pc); ! 509: if (unlikely(idx >= 0)) { ! 510: ! 511: flag = dsp_opcode_type(prev_pc, pc); ! 512: if (flag == CALL_SUBROUTINE) { ! 513: dsp_callinfo.return_pc = DSP_GetNextPC(prev_pc); /* slow! */ ! 514: } else if (caller_pc != PC_UNDEFINED) { ! 515: /* returned from function, change return ! 516: * instruction address to address of ! 517: * what did the returned call. ! 518: */ ! 519: prev_pc = caller_pc; ! 520: assert(is_prev_instr(prev_pc, pc)); ! 521: flag = CALL_NEXT; ! 522: } ! 523: Profile_CallStart(idx, &dsp_callinfo, prev_pc, flag, pc, counters); ! 524: ! 525: } ! 526: } ! 527: ! 528: /** ! 529: * Update DSP cycle and count statistics for PC address. ! 530: * ! 531: * This is called after instruction is executed and PC points ! 532: * to next instruction i.e. info is for previous PC address. ! 533: */ ! 534: void Profile_DspUpdate(void) ! 535: { ! 536: dsp_profile_item_t *prev; ! 537: Uint16 pc, prev_pc, cycles; ! 538: counters_t *counters; ! 539: ! 540: prev_pc = dsp_profile.prev_pc; ! 541: dsp_profile.prev_pc = pc = DSP_GetPC(); ! 542: prev = dsp_profile.data + prev_pc; ! 543: ! 544: if (likely(prev->count < MAX_DSP_PROFILE_VALUE)) { ! 545: prev->count++; ! 546: } ! 547: ! 548: cycles = DSP_GetInstrCycles(); ! 549: if (likely(prev->cycles < MAX_DSP_PROFILE_VALUE - cycles)) { ! 550: prev->cycles += cycles; ! 551: } else { ! 552: prev->cycles = MAX_DSP_PROFILE_VALUE; ! 553: } ! 554: ! 555: if (unlikely(cycles < prev->min_cycle)) { ! 556: prev->min_cycle = cycles; ! 557: } ! 558: if (unlikely(cycles > prev->max_cycle)) { ! 559: prev->max_cycle = cycles; ! 560: } ! 561: ! 562: counters = &(dsp_profile.ram.counters); ! 563: if (dsp_callinfo.sites) { ! 564: collect_calls(prev_pc, counters); ! 565: } ! 566: /* counters are increased after caller info is processed, ! 567: * otherwise cost for the instruction calling the callee ! 568: * doesn't get accounted to caller (but callee). ! 569: */ ! 570: counters->cycles += cycles; ! 571: counters->count++; ! 572: } ! 573: ! 574: /** ! 575: * Helper for collecting DSP profile area statistics. ! 576: */ ! 577: static void update_area_item(profile_area_t *area, Uint16 addr, dsp_profile_item_t *item) ! 578: { ! 579: Uint64 cycles = item->cycles; ! 580: Uint64 count = item->count; ! 581: Uint16 diff; ! 582: ! 583: if (!count) { ! 584: return; ! 585: } ! 586: if (cycles == MAX_DSP_PROFILE_VALUE) { ! 587: area->overflow = true; ! 588: } ! 589: if (item->max_cycle) { ! 590: diff = item->max_cycle - item->min_cycle; ! 591: } else { ! 592: diff = 0; ! 593: } ! 594: ! 595: area->counters.count += count; ! 596: area->counters.cycles += cycles; ! 597: area->counters.misses += diff; ! 598: ! 599: if (addr < area->lowest) { ! 600: area->lowest = addr; ! 601: } ! 602: area->highest = addr; ! 603: ! 604: area->active++; ! 605: } ! 606: ! 607: /** ! 608: * Stop and process the DSP profiling data; collect stats and ! 609: * prepare for more optimal sorting. ! 610: */ ! 611: void Profile_DspStop(void) ! 612: { ! 613: dsp_profile_item_t *item; ! 614: profile_area_t *area; ! 615: Uint16 *sort_arr; ! 616: Uint32 addr; ! 617: ! 618: if (dsp_profile.processed || !dsp_profile.enabled) { ! 619: return; ! 620: } ! 621: ! 622: Profile_FinalizeCalls(&(dsp_callinfo), &(dsp_profile.ram.counters), Symbols_GetByDspAddress); ! 623: ! 624: /* find lowest and highest addresses executed */ ! 625: area = &dsp_profile.ram; ! 626: memset(area, 0, sizeof(profile_area_t)); ! 627: area->lowest = DSP_PROFILE_ARR_SIZE; ! 628: ! 629: item = dsp_profile.data; ! 630: for (addr = 0; addr < DSP_PROFILE_ARR_SIZE; addr++, item++) { ! 631: update_area_item(area, addr, item); ! 632: } ! 633: ! 634: /* allocate address array for sorting */ ! 635: sort_arr = calloc(dsp_profile.ram.active, sizeof(*sort_arr)); ! 636: ! 637: if (!sort_arr) { ! 638: perror("ERROR: allocating DSP profile address data"); ! 639: free(dsp_profile.data); ! 640: dsp_profile.data = NULL; ! 641: return; ! 642: } ! 643: printf("Allocated DSP profile address buffer (%d KB).\n", ! 644: (int)sizeof(*sort_arr)*(dsp_profile.ram.active+512)/1024); ! 645: dsp_profile.sort_arr = sort_arr; ! 646: ! 647: /* ...and fill addresses for used instructions... */ ! 648: area = &dsp_profile.ram; ! 649: item = &(dsp_profile.data[area->lowest]); ! 650: for (addr = area->lowest; addr <= area->highest; addr++, item++) { ! 651: if (item->count) { ! 652: *sort_arr++ = addr; ! 653: } ! 654: } ! 655: //printf("%d/%d/%d\n", area->active, sort_arr-dsp_profile.sort_arr, active); ! 656: ! 657: Profile_DspShowStats(); ! 658: dsp_profile.processed = true; ! 659: } ! 660: ! 661: /** ! 662: * Get pointers to DSP profile enabling and disasm address variables ! 663: * for updating them (in parser). ! 664: */ ! 665: void Profile_DspGetPointers(bool **enabled, Uint32 **disasm_addr) ! 666: { ! 667: *disasm_addr = &dsp_profile.disasm_addr; ! 668: *enabled = &dsp_profile.enabled; ! 669: } ! 670: ! 671: /** ! 672: * Get callinfo & symbol search pointers for stack walking. ! 673: */ ! 674: void Profile_DspGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32)) ! 675: { ! 676: *callinfo = &(dsp_callinfo); ! 677: *get_symbol = Symbols_GetByDspAddress; ! 678: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.