|
|
1.1 root 1: /*
2: * Hatari - profiledsp.c
3: *
1.1.1.3 root 4: * Copyright (C) 2010-2015 by Eero Tamminen
1.1 root 5: *
6: * This file is distributed under the GNU General Public License, version 2
7: * or at your option any later version. Read the file gpl.txt for details.
8: *
9: * profiledsp.c - functions for profiling DSP and showing the results.
10: */
11: const char Profiledsp_fileid[] = "Hatari profiledsp.c : " __DATE__ " " __TIME__;
12:
13: #include <stdio.h>
14: #include <inttypes.h>
1.1.1.4 root 15: #include <limits.h>
1.1 root 16: #include <assert.h>
17: #include "main.h"
18: #include "configuration.h"
19: #include "clocks_timings.h"
20: #include "dsp.h"
1.1.1.4 root 21: #include "symbols.h"
1.1 root 22: #include "profile.h"
23: #include "profile_priv.h"
1.1.1.4 root 24: #include "debug_priv.h"
1.1.1.2 root 25: /* for VBL info */
26: #include "screen.h"
27: #include "video.h"
1.1 root 28:
29: static callinfo_t dsp_callinfo;
30:
31: #define DSP_PROFILE_ARR_SIZE 0x10000
32: #define MAX_DSP_PROFILE_VALUE 0xFFFFFFFFFFFFFFFFLL
33:
34: typedef struct {
35: Uint64 count; /* how many times this address is used */
36: Uint64 cycles; /* how many DSP cycles was taken at this address */
37: Uint16 min_cycle;
38: Uint16 max_cycle;
39: } dsp_profile_item_t;
40:
41: static struct {
42: dsp_profile_item_t *data; /* profile data */
43: profile_area_t ram; /* statistics for whole memory */
44: Uint16 *sort_arr; /* data indexes used for sorting */
45: Uint16 prev_pc; /* previous PC for which the cycles are for */
1.1.1.2 root 46: Uint16 loop_start; /* address of last loop start */
47: Uint16 loop_end; /* address of last loop end */
48: Uint32 loop_count; /* how many times it was looped */
1.1 root 49: Uint32 disasm_addr; /* 'dspaddresses' command start address */
50: bool processed; /* true when data is already processed */
51: bool enabled; /* true when profiling enabled */
52: } dsp_profile;
53:
54:
55: /* ------------------ DSP profile results ----------------- */
56:
57: /**
58: * Get DSP cycles, count and count percentage for given address.
59: * Return true if data was available and non-zero, false otherwise.
60: */
61: bool Profile_DspAddressData(Uint16 addr, float *percentage, Uint64 *count, Uint64 *cycles, Uint16 *cycle_diff)
62: {
63: dsp_profile_item_t *item;
64: if (!dsp_profile.data) {
65: return false;
66: }
67: item = dsp_profile.data + addr;
68:
69: *cycles = item->cycles;
70: *count = item->count;
71: if (item->max_cycle) {
72: *cycle_diff = item->max_cycle - item->min_cycle;
73: } else {
74: *cycle_diff = 0;
75: }
76: if (dsp_profile.ram.counters.count) {
77: *percentage = 100.0*(*count)/dsp_profile.ram.counters.count;
78: } else {
79: *percentage = 0.0;
80: }
81: return (*count > 0);
82: }
83:
84: /**
85: * show DSP specific profile statistics.
86: */
87: void Profile_DspShowStats(void)
88: {
89: profile_area_t *area = &dsp_profile.ram;
90: fprintf(stderr, "DSP profile statistics (0x0-0xFFFF):\n");
91: if (!area->active) {
92: fprintf(stderr, "- no activity\n");
93: return;
94: }
95: fprintf(stderr, "- active address range:\n 0x%04x-0x%04x\n",
96: area->lowest, area->highest);
97: fprintf(stderr, "- active instruction addresses:\n %d\n",
98: area->active);
99: fprintf(stderr, "- executed instructions:\n %"PRIu64"\n",
100: area->counters.count);
101: /* indicates either instruction(s) that address different memory areas
102: * (they can have different access costs), or more significantly,
103: * DSP code that has changed during profiling.
104: */
105: fprintf(stderr, "- sum of per instruction cycle changes\n"
106: " (can indicate code change during profiling):\n %"PRIu64"\n",
1.1.1.3 root 107: area->counters.cycles_diffs);
1.1 root 108:
109: fprintf(stderr, "- used cycles:\n %"PRIu64"\n",
110: area->counters.cycles);
111: if (area->overflow) {
112: fprintf(stderr, " *** COUNTERS OVERFLOW! ***\n");
113: }
114: fprintf(stderr, "\n= %.5fs\n", (double)(area->counters.cycles) / MachineClocks.DSP_Freq);
115: }
116:
117: /**
118: * Show DSP instructions which execution was profiled, in the address order,
119: * starting from the given address. Return next disassembly address.
120: */
1.1.1.4 root 121: Uint16 Profile_DspShowAddresses(Uint32 addr, Uint32 upper, FILE *out, paging_t use_paging)
1.1 root 122: {
1.1.1.4 root 123: int show, shown, addrs, active;
1.1 root 124: dsp_profile_item_t *data;
125: Uint16 nextpc;
126: Uint32 end;
127: const char *symbol;
128:
129: data = dsp_profile.data;
130: if (!data) {
131: fprintf(stderr, "ERROR: no DSP profiling data available!\n");
132: return 0;
133: }
134:
135: end = DSP_PROFILE_ARR_SIZE;
136: active = dsp_profile.ram.active;
137: if (upper) {
138: if (upper < end) {
139: end = upper;
140: }
1.1.1.5 ! root 141: }
! 142: show = INT_MAX;
! 143: if (use_paging == PAGING_ENABLED) {
1.1.1.4 root 144: show = DebugUI_GetPageLines(ConfigureParams.Debugger.nDisasmLines, 0);
1.1.1.5 ! root 145: if (!show) {
! 146: show = INT_MAX;
1.1 root 147: }
148: }
149:
150: fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <max cycle difference>)\n", out);
1.1.1.4 root 151: shown = 2; /* first and last printf */
1.1 root 152:
1.1.1.4 root 153: addrs = nextpc = 0;
1.1.1.5 ! root 154: for (; shown < show && addrs < active && addr < end; addr++) {
1.1 root 155: if (!data[addr].count) {
156: continue;
157: }
158: if (addr != nextpc && nextpc) {
159: fputs("[...]\n", out);
1.1.1.4 root 160: shown++;
1.1 root 161: }
1.1.1.4 root 162: symbol = Symbols_GetByDspAddress(addr, SYMTYPE_TEXT);
1.1 root 163: if (symbol) {
164: fprintf(out, "%s:\n", symbol);
1.1.1.4 root 165: shown++;
1.1 root 166: }
167: nextpc = DSP_DisasmAddress(out, addr, addr);
1.1.1.4 root 168: addrs++;
1.1 root 169: shown++;
170: }
1.1.1.5 ! root 171: if (addr < end) {
! 172: printf("Disassembled %d (of active %d) DSP addresses.\n", addrs, active);
! 173: } else {
! 174: printf("Disassembled last %d (of active %d) DSP addresses, wrapping...\n", addrs, active);
! 175: nextpc = 0;
! 176: }
1.1 root 177: return nextpc;
178: }
179:
180: /**
181: * compare function for qsort() to sort DSP profile data by descdending
182: * address cycles counts.
183: */
184: static int cmp_dsp_cycles(const void *p1, const void *p2)
185: {
186: Uint64 count1 = dsp_profile.data[*(const Uint16*)p1].cycles;
187: Uint64 count2 = dsp_profile.data[*(const Uint16*)p2].cycles;
188: if (count1 > count2) {
189: return -1;
190: }
191: if (count1 < count2) {
192: return 1;
193: }
194: return 0;
195: }
196:
197: /**
198: * Sort DSP profile data addresses by cycle counts and show the results.
199: */
200: void Profile_DspShowCycles(int show)
201: {
202: int active;
203: Uint16 *sort_arr, *end, addr;
204: dsp_profile_item_t *data = dsp_profile.data;
205: float percentage;
206: Uint64 count;
207:
208: if (!data) {
209: fprintf(stderr, "ERROR: no DSP profiling data available!\n");
210: return;
211: }
212:
213: active = dsp_profile.ram.active;
214: sort_arr = dsp_profile.sort_arr;
215: qsort(sort_arr, active, sizeof(*sort_arr), cmp_dsp_cycles);
216:
217: printf("addr:\tcycles:\n");
218: show = (show < active ? show : active);
219: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
220: addr = *sort_arr;
221: count = data[addr].cycles;
222: percentage = 100.0*count/dsp_profile.ram.counters.cycles;
223: printf("0x%04x\t%5.2f%%\t%"PRIu64"%s\n", addr, percentage, count,
224: count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
225: }
226: printf("%d DSP addresses listed.\n", show);
227: }
228:
229:
230: /**
231: * compare function for qsort() to sort DSP profile data by descdending
232: * address access counts.
233: */
234: static int cmp_dsp_count(const void *p1, const void *p2)
235: {
236: Uint64 count1 = dsp_profile.data[*(const Uint16*)p1].count;
237: Uint64 count2 = dsp_profile.data[*(const Uint16*)p2].count;
238: if (count1 > count2) {
239: return -1;
240: }
241: if (count1 < count2) {
242: return 1;
243: }
244: return 0;
245: }
246:
247: /**
248: * Sort DSP profile data addresses by call counts and show the results.
249: * If symbols are requested and symbols are loaded, show (only) addresses
250: * matching a symbol.
251: */
252: void Profile_DspShowCounts(int show, bool only_symbols)
253: {
254: dsp_profile_item_t *data = dsp_profile.data;
255: int symbols, matched, active;
256: Uint16 *sort_arr, *end, addr;
257: const char *name;
258: float percentage;
259: Uint64 count;
260:
261: if (!data) {
262: fprintf(stderr, "ERROR: no DSP profiling data available!\n");
263: return;
264: }
265: active = dsp_profile.ram.active;
266: show = (show < active ? show : active);
267:
268: sort_arr = dsp_profile.sort_arr;
269: qsort(sort_arr, active, sizeof(*sort_arr), cmp_dsp_count);
270:
271: if (!only_symbols) {
272: printf("addr:\tcount:\n");
273: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
274: addr = *sort_arr;
275: count = data[addr].count;
276: percentage = 100.0*count/dsp_profile.ram.counters.count;
277: printf("0x%04x\t%5.2f%%\t%"PRIu64"%s\n",
278: addr, percentage, count,
279: count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
280: }
281: printf("%d DSP addresses listed.\n", show);
282: return;
283: }
284:
1.1.1.4 root 285: symbols = Symbols_DspCodeCount();
1.1 root 286: if (!symbols) {
287: fprintf(stderr, "ERROR: no DSP symbols loaded!\n");
288: return;
289: }
290: matched = 0;
291:
292: printf("addr:\tcount:\t\tsymbol:\n");
293: for (end = sort_arr + active; sort_arr < end; sort_arr++) {
294:
295: addr = *sort_arr;
1.1.1.4 root 296: name = Symbols_GetByDspAddress(addr, SYMTYPE_TEXT);
1.1 root 297: if (!name) {
298: continue;
299: }
300: count = data[addr].count;
301: percentage = 100.0*count/dsp_profile.ram.counters.count;
302: printf("0x%04x\t%.2f%%\t%"PRIu64"\t%s%s\n",
303: addr, percentage, count, name,
304: count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
305:
306: matched++;
307: if (matched >= show || matched >= symbols) {
308: break;
309: }
310: }
311: printf("%d DSP symbols listed.\n", matched);
312: }
313:
314:
315: static const char * addr2name(Uint32 addr, Uint64 *total)
316: {
317: *total = dsp_profile.data[addr].count;
1.1.1.4 root 318: return Symbols_GetByDspAddress(addr, SYMTYPE_TEXT);
1.1 root 319: }
320:
321: /**
322: * Output DSP callers info to given file.
323: */
324: void Profile_DspShowCallers(FILE *fp)
325: {
326: Profile_ShowCallers(fp, dsp_callinfo.sites, dsp_callinfo.site, addr2name);
327: }
328:
329: /**
330: * Save DSP profile information to given file.
331: */
332: void Profile_DspSave(FILE *out)
333: {
334: /* Comma separated descriptions for the profile disassembly data fields.
335: * Instructions and cycles need to be first two fields!
336: */
337: fputs("Field names:\tExecuted instructions, Used cycles, Largest cycle differences (= code changes during profiling)\n", out);
338: /* (Python) pegexp that matches address and all describled fields from disassembly:
339: * <space>:<address> <opcodes> (<instr cycles>) <instr> <count>% (<count>, <cycles>)
340: * p:0202 0aa980 000200 (07 cyc) jclr #0,x:$ffe9,p:$0200 0.00% (6, 42)
341: */
342: fputs("Field regexp:\t^p:([0-9a-f]+) .*% \\((.*)\\)$\n", out);
1.1.1.4 root 343: Profile_DspShowAddresses(0, DSP_PROFILE_ARR_SIZE, out, PAGING_DISABLED);
1.1 root 344: Profile_DspShowCallers(out);
345: }
346:
347: /* ------------------ DSP profile control ----------------- */
348:
349: /**
350: * Initialize DSP profiling when necessary. Return true if profiling.
351: */
352: bool Profile_DspStart(void)
353: {
354: dsp_profile_item_t *item;
355: int i;
356:
357: Profile_FreeCallinfo(&(dsp_callinfo));
358: if (dsp_profile.sort_arr) {
359: /* remove previous results */
360: free(dsp_profile.sort_arr);
361: free(dsp_profile.data);
362: dsp_profile.sort_arr = NULL;
363: dsp_profile.data = NULL;
364: printf("Freed previous DSP profile buffers.\n");
365: }
366: if (!dsp_profile.enabled) {
367: return false;
368: }
369: /* zero everything */
370: memset(&dsp_profile, 0, sizeof(dsp_profile));
371:
372: dsp_profile.data = calloc(DSP_PROFILE_ARR_SIZE, sizeof(*dsp_profile.data));
373: if (!dsp_profile.data) {
374: perror("ERROR, new DSP profile buffer alloc failed");
375: return false;
376: }
377: printf("Allocated DSP profile buffer (%d KB).\n",
378: (int)sizeof(*dsp_profile.data)*DSP_PROFILE_ARR_SIZE/1024);
379:
1.1.1.4 root 380: Profile_AllocCallinfo(&(dsp_callinfo), Symbols_DspCodeCount(), "DSP");
1.1 root 381:
382: item = dsp_profile.data;
383: for (i = 0; i < DSP_PROFILE_ARR_SIZE; i++, item++) {
384: item->min_cycle = 0xFFFF;
385: }
386: dsp_profile.prev_pc = DSP_GetPC();
387:
1.1.1.2 root 388: dsp_profile.loop_start = 0xFFFF;
389: dsp_profile.loop_end = 0xFFFF;
390: dsp_profile.loop_count = 0;
391: Profile_LoopReset();
392:
1.1 root 393: dsp_profile.disasm_addr = 0;
394: dsp_profile.processed = false;
395: dsp_profile.enabled = true;
396: return dsp_profile.enabled;
397: }
398:
399: /* return true if pc is next instruction for previous pc */
400: static bool is_prev_instr(Uint16 prev_pc, Uint16 pc)
401: {
402: /* just moved to next instruction (1-2 words)? */
403: if (prev_pc < pc && (pc - prev_pc) <= 4) {
404: return true;
405: }
406: return false;
407: }
408:
409: /* return branch type based on caller instruction type */
410: static calltype_t dsp_opcode_type(Uint16 prev_pc, Uint16 pc)
411: {
412: const char *dummy;
413: Uint32 opcode;
414:
415: /* 24-bit instruction opcode */
416: opcode = DSP_ReadMemory(prev_pc, 'P', &dummy) & 0xFFFFFF;
417:
418: /* subroutine returns */
419: if (opcode == 0xC) { /* (just) RTS */
420: return CALL_SUBRETURN;
421: }
422: /* unconditional subroutine calls */
423: if ((opcode & 0xFFF000) == 0xD0000 || /* JSR 00001101 0000aaaa aaaaaaaa */
424: (opcode & 0xFFC0FF) == 0xBC080) { /* JSR 00001011 11MMMRRR 10000000 */
425: return CALL_SUBROUTINE;
426: }
427: /* conditional subroutine calls */
428: if ((opcode & 0xFF0000) == 0xF0000 || /* JSCC 00001111 CCCCaaaa aaaaaaaa */
429: (opcode & 0xFFC0F0) == 0xBC0A0 || /* JSCC 00001011 11MMMRRR 1010CCCC */
430: (opcode & 0xFFC0A0) == 0xB4080 || /* JSCLR 00001011 01MMMRRR 1S0bbbbb */
431: (opcode & 0xFFC0A0) == 0xB0080 || /* JSCLR 00001011 00aaaaaa 1S0bbbbb */
432: (opcode & 0xFFC0A0) == 0xB8080 || /* JSCLR 00001011 10pppppp 1S0bbbbb */
433: (opcode & 0xFFC0E0) == 0xBC000 || /* JSCLR 00001011 11DDDDDD 000bbbbb */
434: (opcode & 0xFFC0A0) == 0xB40A0 || /* JSSET 00001011 01MMMRRR 1S1bbbbb */
435: (opcode & 0xFFC0A0) == 0xB00A0 || /* JSSET 00001011 00aaaaaa 1S1bbbbb */
436: (opcode & 0xFFC0A0) == 0xB80A0 || /* JSSET 00001011 10pppppp 1S1bbbbb */
437: (opcode & 0xFFC0E0) == 0xBC020) { /* JSSET 00001011 11DDDDDD 001bbbbb */
438: /* hopefully fairly safe heuristic:
439: * if previously executed instruction
440: * was one before current one, no
441: * subroutine call was made to next
442: * instruction, the condition just
443: * wasn't met.
444: */
445: if (is_prev_instr(prev_pc, pc)) {
446: return CALL_NEXT;
447: }
448: return CALL_SUBROUTINE;
449: }
450: /* exception handler returns */
451: if (opcode == 0x4) { /* (just) RTI */
452: return CALL_EXCRETURN;
453: }
454:
455: /* Besides CALL_UNKNOWN, rest isn't used by subroutine call
456: * cost collection. However, it's useful info when debugging
457: * code or reading full callgraphs (because optimized code uses
458: * also jumps/branches for subroutine calls).
459: */
460:
461: /* TODO: exception invocation.
462: * Could be detected by PC going through low interrupt vector adresses,
463: * but fast-calls using JSR/RTS would need separate handling.
464: */
465: if (0) { /* TODO */
466: return CALL_EXCEPTION;
467: }
468: /* branches */
469: if ((opcode & 0xFFF000) == 0xC0000 || /* JMP 00001100 0000aaaa aaaaaaaa */
470: (opcode & 0xFFC0FF) == 0xAC080 || /* JMP 00001010 11MMMRRR 10000000 */
471: (opcode & 0xFF0000) == 0xE0000 || /* JCC 00001110 CCCCaaaa aaaaaaaa */
472: (opcode & 0xFFC0F0) == 0xAC0A0 || /* JCC 00001010 11MMMRRR 1010CCCC */
473: (opcode & 0xFFC0A0) == 0xA8080 || /* JCLR 00001010 10pppppp 1S0bbbbb */
474: (opcode & 0xFFC0A0) == 0xA4080 || /* JCLR 00001010 01MMMRRR 1S0bbbbb */
475: (opcode & 0xFFC0A0) == 0xA0080 || /* JCLR 00001010 00aaaaaa 1S0bbbbb */
476: (opcode & 0xFFC0E0) == 0xAC000 || /* JCLR 00001010 11dddddd 000bbbbb */
477: (opcode & 0xFFC0A0) == 0xA80A0 || /* JSET 00001010 10pppppp 1S1bbbbb */
478: (opcode & 0xFFC0A0) == 0xA40A0 || /* JSET 00001010 01MMMRRR 1S1bbbbb */
479: (opcode & 0xFFC0A0) == 0xA00A0 || /* JSET 00001010 00aaaaaa 1S1bbbbb */
480: (opcode & 0xFFC0E0) == 0xAC020 || /* JSET 00001010 11dddddd 001bbbbb */
481: (opcode & 0xFF00F0) == 0x600A0 || /* REP 00000110 iiiiiiii 1010hhhh */
482: (opcode & 0xFFC0FF) == 0x6C020 || /* REP 00000110 11dddddd 00100000 */
483: (opcode & 0xFFC0BF) == 0x64020 || /* REP 00000110 01MMMRRR 0s100000 */
484: (opcode & 0xFFC0BF) == 0x60020 || /* REP 00000110 00aaaaaa 0s100000 */
485: (opcode & 0xFF00F0) == 0x60080 || /* DO/ENDO 00000110 iiiiiiii 1000hhhh */
486: (opcode & 0xFFC0FF) == 0x6C000 || /* DO/ENDO 00000110 11DDDDDD 00000000 */
487: (opcode & 0xFFC0BF) == 0x64000 || /* DO/ENDO 00000110 01MMMRRR 0S000000 */
488: (opcode & 0xFFC0BF) == 0x60000) { /* DO/ENDO 00000110 00aaaaaa 0S000000 */
489: return CALL_BRANCH;
490: }
491: if (is_prev_instr(prev_pc, pc)) {
492: return CALL_NEXT;
493: }
494: return CALL_UNKNOWN;
495: }
496:
497: /**
498: * If call tracking is enabled (there are symbols), collect
499: * information about subroutine and other calls, and their costs.
500: *
501: * Like with profile data, caller info checks need to be for previous
502: * instruction, that's why "pc" argument for this function actually
503: * needs to be previous PC.
504: */
505: static void collect_calls(Uint16 pc, counters_t *counters)
506: {
507: calltype_t flag;
508: Uint16 prev_pc;
509: Uint32 caller_pc;
510: int idx;
511:
512: prev_pc = dsp_callinfo.prev_pc;
513: dsp_callinfo.prev_pc = pc;
514: caller_pc = PC_UNDEFINED;
515:
516: /* address is return address for last subroutine call? */
517: if (unlikely(pc == dsp_callinfo.return_pc) && likely(dsp_callinfo.depth)) {
518:
519: flag = dsp_opcode_type(prev_pc, pc);
520: /* return address is entered either by subroutine return,
521: * or by returning from exception that interrupted
522: * the instruction at return address.
523: */
524: if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) {
525: caller_pc = Profile_CallEnd(&dsp_callinfo, counters);
526: }
527: }
528:
529: /* address is one which we're tracking? */
1.1.1.4 root 530: idx = Symbols_GetDspCodeIndex(pc);
1.1 root 531: if (unlikely(idx >= 0)) {
532:
533: flag = dsp_opcode_type(prev_pc, pc);
534: if (flag == CALL_SUBROUTINE) {
535: dsp_callinfo.return_pc = DSP_GetNextPC(prev_pc); /* slow! */
536: } else if (caller_pc != PC_UNDEFINED) {
537: /* returned from function, change return
538: * instruction address to address of
539: * what did the returned call.
540: */
541: prev_pc = caller_pc;
542: assert(is_prev_instr(prev_pc, pc));
543: flag = CALL_NEXT;
544: }
545: Profile_CallStart(idx, &dsp_callinfo, prev_pc, flag, pc, counters);
546:
547: }
548: }
549:
550: /**
1.1.1.2 root 551: * log last loop info, if there's suitable data for one
552: */
553: static void log_last_loop(void)
554: {
555: unsigned len = dsp_profile.loop_end - dsp_profile.loop_start;
556: if (dsp_profile.loop_count > 1 && (len < profile_loop.dsp_limit || !profile_loop.dsp_limit)) {
557: fprintf(profile_loop.fp, "DSP %d 0x%04x %d %d\n", nVBLs,
558: dsp_profile.loop_start, len, dsp_profile.loop_count);
559: fflush(profile_loop.fp);
560: }
561: }
562:
563: /**
1.1 root 564: * Update DSP cycle and count statistics for PC address.
565: *
566: * This is called after instruction is executed and PC points
567: * to next instruction i.e. info is for previous PC address.
568: */
569: void Profile_DspUpdate(void)
570: {
571: dsp_profile_item_t *prev;
572: Uint16 pc, prev_pc, cycles;
573: counters_t *counters;
574:
575: prev_pc = dsp_profile.prev_pc;
576: dsp_profile.prev_pc = pc = DSP_GetPC();
1.1.1.2 root 577:
578: if (unlikely(profile_loop.fp)) {
579: if (pc < prev_pc) {
580: if (pc == dsp_profile.loop_start && prev_pc == dsp_profile.loop_end) {
581: dsp_profile.loop_count++;
582: } else {
583: dsp_profile.loop_start = pc;
584: dsp_profile.loop_end = prev_pc;
585: dsp_profile.loop_count = 1;
586: }
587: } else {
588: if (pc > dsp_profile.loop_end) {
589: log_last_loop();
590: dsp_profile.loop_end = 0xFFFF;
591: dsp_profile.loop_count = 0;
592: }
593: }
594: }
595:
1.1 root 596: prev = dsp_profile.data + prev_pc;
597:
598: if (likely(prev->count < MAX_DSP_PROFILE_VALUE)) {
599: prev->count++;
600: }
601:
602: cycles = DSP_GetInstrCycles();
603: if (likely(prev->cycles < MAX_DSP_PROFILE_VALUE - cycles)) {
604: prev->cycles += cycles;
605: } else {
606: prev->cycles = MAX_DSP_PROFILE_VALUE;
607: }
608:
609: if (unlikely(cycles < prev->min_cycle)) {
610: prev->min_cycle = cycles;
611: }
612: if (unlikely(cycles > prev->max_cycle)) {
613: prev->max_cycle = cycles;
614: }
615:
616: counters = &(dsp_profile.ram.counters);
617: if (dsp_callinfo.sites) {
618: collect_calls(prev_pc, counters);
619: }
620: /* counters are increased after caller info is processed,
621: * otherwise cost for the instruction calling the callee
622: * doesn't get accounted to caller (but callee).
623: */
624: counters->cycles += cycles;
625: counters->count++;
626: }
627:
628: /**
629: * Helper for collecting DSP profile area statistics.
630: */
631: static void update_area_item(profile_area_t *area, Uint16 addr, dsp_profile_item_t *item)
632: {
633: Uint64 cycles = item->cycles;
634: Uint64 count = item->count;
635: Uint16 diff;
636:
637: if (!count) {
638: return;
639: }
640: if (cycles == MAX_DSP_PROFILE_VALUE) {
641: area->overflow = true;
642: }
643: if (item->max_cycle) {
644: diff = item->max_cycle - item->min_cycle;
645: } else {
646: diff = 0;
647: }
648:
649: area->counters.count += count;
650: area->counters.cycles += cycles;
1.1.1.3 root 651: area->counters.cycles_diffs += diff;
1.1 root 652:
653: if (addr < area->lowest) {
654: area->lowest = addr;
655: }
656: area->highest = addr;
657:
658: area->active++;
659: }
660:
661: /**
662: * Stop and process the DSP profiling data; collect stats and
663: * prepare for more optimal sorting.
664: */
665: void Profile_DspStop(void)
666: {
667: dsp_profile_item_t *item;
668: profile_area_t *area;
669: Uint16 *sort_arr;
670: Uint32 addr;
671:
672: if (dsp_profile.processed || !dsp_profile.enabled) {
673: return;
674: }
675:
1.1.1.2 root 676: log_last_loop();
677: if (profile_loop.fp) {
678: fflush(profile_loop.fp);
679: }
680:
1.1 root 681: Profile_FinalizeCalls(&(dsp_callinfo), &(dsp_profile.ram.counters), Symbols_GetByDspAddress);
682:
683: /* find lowest and highest addresses executed */
684: area = &dsp_profile.ram;
685: memset(area, 0, sizeof(profile_area_t));
686: area->lowest = DSP_PROFILE_ARR_SIZE;
687:
688: item = dsp_profile.data;
689: for (addr = 0; addr < DSP_PROFILE_ARR_SIZE; addr++, item++) {
690: update_area_item(area, addr, item);
691: }
692:
693: /* allocate address array for sorting */
694: sort_arr = calloc(dsp_profile.ram.active, sizeof(*sort_arr));
695:
696: if (!sort_arr) {
697: perror("ERROR: allocating DSP profile address data");
698: free(dsp_profile.data);
699: dsp_profile.data = NULL;
700: return;
701: }
702: printf("Allocated DSP profile address buffer (%d KB).\n",
703: (int)sizeof(*sort_arr)*(dsp_profile.ram.active+512)/1024);
704: dsp_profile.sort_arr = sort_arr;
705:
706: /* ...and fill addresses for used instructions... */
707: area = &dsp_profile.ram;
708: item = &(dsp_profile.data[area->lowest]);
709: for (addr = area->lowest; addr <= area->highest; addr++, item++) {
710: if (item->count) {
711: *sort_arr++ = addr;
712: }
713: }
714: //printf("%d/%d/%d\n", area->active, sort_arr-dsp_profile.sort_arr, active);
715:
716: Profile_DspShowStats();
717: dsp_profile.processed = true;
718: }
719:
720: /**
721: * Get pointers to DSP profile enabling and disasm address variables
722: * for updating them (in parser).
723: */
724: void Profile_DspGetPointers(bool **enabled, Uint32 **disasm_addr)
725: {
726: *disasm_addr = &dsp_profile.disasm_addr;
727: *enabled = &dsp_profile.enabled;
728: }
729:
730: /**
731: * Get callinfo & symbol search pointers for stack walking.
732: */
1.1.1.4 root 733: void Profile_DspGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32, symtype_t))
1.1 root 734: {
735: *callinfo = &(dsp_callinfo);
736: *get_symbol = Symbols_GetByDspAddress;
737: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.