|
|
1.1 root 1: /*
2: * Hatari - profiledsp.c
3: *
1.1.1.3 ! root 4: * Copyright (C) 2010-2015 by Eero Tamminen
1.1 root 5: *
6: * This file is distributed under the GNU General Public License, version 2
7: * or at your option any later version. Read the file gpl.txt for details.
8: *
9: * profiledsp.c - functions for profiling DSP and showing the results.
10: */
11: const char Profiledsp_fileid[] = "Hatari profiledsp.c : " __DATE__ " " __TIME__;
12:
13: #include <stdio.h>
14: #include <inttypes.h>
15: #include <assert.h>
16: #include "main.h"
17: #include "configuration.h"
18: #include "clocks_timings.h"
19: #include "dsp.h"
20: #include "profile.h"
21: #include "profile_priv.h"
22: #include "symbols.h"
1.1.1.2 root 23: /* for VBL info */
24: #include "screen.h"
25: #include "video.h"
1.1 root 26:
27: static callinfo_t dsp_callinfo;
28:
29: #define DSP_PROFILE_ARR_SIZE 0x10000
30: #define MAX_DSP_PROFILE_VALUE 0xFFFFFFFFFFFFFFFFLL
31:
32: typedef struct {
33: Uint64 count; /* how many times this address is used */
34: Uint64 cycles; /* how many DSP cycles was taken at this address */
35: Uint16 min_cycle;
36: Uint16 max_cycle;
37: } dsp_profile_item_t;
38:
39: static struct {
40: dsp_profile_item_t *data; /* profile data */
41: profile_area_t ram; /* statistics for whole memory */
42: Uint16 *sort_arr; /* data indexes used for sorting */
43: Uint16 prev_pc; /* previous PC for which the cycles are for */
1.1.1.2 root 44: Uint16 loop_start; /* address of last loop start */
45: Uint16 loop_end; /* address of last loop end */
46: Uint32 loop_count; /* how many times it was looped */
1.1 root 47: Uint32 disasm_addr; /* 'dspaddresses' command start address */
48: bool processed; /* true when data is already processed */
49: bool enabled; /* true when profiling enabled */
50: } dsp_profile;
51:
52:
53: /* ------------------ DSP profile results ----------------- */
54:
55: /**
56: * Get DSP cycles, count and count percentage for given address.
57: * Return true if data was available and non-zero, false otherwise.
58: */
59: bool Profile_DspAddressData(Uint16 addr, float *percentage, Uint64 *count, Uint64 *cycles, Uint16 *cycle_diff)
60: {
61: dsp_profile_item_t *item;
62: if (!dsp_profile.data) {
63: return false;
64: }
65: item = dsp_profile.data + addr;
66:
67: *cycles = item->cycles;
68: *count = item->count;
69: if (item->max_cycle) {
70: *cycle_diff = item->max_cycle - item->min_cycle;
71: } else {
72: *cycle_diff = 0;
73: }
74: if (dsp_profile.ram.counters.count) {
75: *percentage = 100.0*(*count)/dsp_profile.ram.counters.count;
76: } else {
77: *percentage = 0.0;
78: }
79: return (*count > 0);
80: }
81:
82: /**
83: * show DSP specific profile statistics.
84: */
85: void Profile_DspShowStats(void)
86: {
87: profile_area_t *area = &dsp_profile.ram;
88: fprintf(stderr, "DSP profile statistics (0x0-0xFFFF):\n");
89: if (!area->active) {
90: fprintf(stderr, "- no activity\n");
91: return;
92: }
93: fprintf(stderr, "- active address range:\n 0x%04x-0x%04x\n",
94: area->lowest, area->highest);
95: fprintf(stderr, "- active instruction addresses:\n %d\n",
96: area->active);
97: fprintf(stderr, "- executed instructions:\n %"PRIu64"\n",
98: area->counters.count);
99: /* indicates either instruction(s) that address different memory areas
100: * (they can have different access costs), or more significantly,
101: * DSP code that has changed during profiling.
102: */
103: fprintf(stderr, "- sum of per instruction cycle changes\n"
104: " (can indicate code change during profiling):\n %"PRIu64"\n",
1.1.1.3 ! root 105: area->counters.cycles_diffs);
1.1 root 106:
107: fprintf(stderr, "- used cycles:\n %"PRIu64"\n",
108: area->counters.cycles);
109: if (area->overflow) {
110: fprintf(stderr, " *** COUNTERS OVERFLOW! ***\n");
111: }
112: fprintf(stderr, "\n= %.5fs\n", (double)(area->counters.cycles) / MachineClocks.DSP_Freq);
113: }
114:
115: /**
116: * Show DSP instructions which execution was profiled, in the address order,
117: * starting from the given address. Return next disassembly address.
118: */
119: Uint16 Profile_DspShowAddresses(Uint32 addr, Uint32 upper, FILE *out)
120: {
121: int show, shown, active;
122: dsp_profile_item_t *data;
123: Uint16 nextpc;
124: Uint32 end;
125: const char *symbol;
126:
127: data = dsp_profile.data;
128: if (!data) {
129: fprintf(stderr, "ERROR: no DSP profiling data available!\n");
130: return 0;
131: }
132:
133: end = DSP_PROFILE_ARR_SIZE;
134: active = dsp_profile.ram.active;
135: if (upper) {
136: if (upper < end) {
137: end = upper;
138: }
139: show = active;
140: } else {
141: show = ConfigureParams.Debugger.nDisasmLines;
142: if (!show || show > active) {
143: show = active;
144: }
145: }
146:
147: fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <max cycle difference>)\n", out);
148:
149: nextpc = 0;
150: for (shown = 0; shown < show && addr < end; addr++) {
151: if (!data[addr].count) {
152: continue;
153: }
154: if (addr != nextpc && nextpc) {
155: fputs("[...]\n", out);
156: }
157: symbol = Symbols_GetByDspAddress(addr);
158: if (symbol) {
159: fprintf(out, "%s:\n", symbol);
160: }
161: nextpc = DSP_DisasmAddress(out, addr, addr);
162: shown++;
163: }
164: printf("Disassembled %d (of active %d) DSP addresses.\n", shown, active);
165: return nextpc;
166: }
167:
168: /**
169: * compare function for qsort() to sort DSP profile data by descdending
170: * address cycles counts.
171: */
172: static int cmp_dsp_cycles(const void *p1, const void *p2)
173: {
174: Uint64 count1 = dsp_profile.data[*(const Uint16*)p1].cycles;
175: Uint64 count2 = dsp_profile.data[*(const Uint16*)p2].cycles;
176: if (count1 > count2) {
177: return -1;
178: }
179: if (count1 < count2) {
180: return 1;
181: }
182: return 0;
183: }
184:
185: /**
186: * Sort DSP profile data addresses by cycle counts and show the results.
187: */
188: void Profile_DspShowCycles(int show)
189: {
190: int active;
191: Uint16 *sort_arr, *end, addr;
192: dsp_profile_item_t *data = dsp_profile.data;
193: float percentage;
194: Uint64 count;
195:
196: if (!data) {
197: fprintf(stderr, "ERROR: no DSP profiling data available!\n");
198: return;
199: }
200:
201: active = dsp_profile.ram.active;
202: sort_arr = dsp_profile.sort_arr;
203: qsort(sort_arr, active, sizeof(*sort_arr), cmp_dsp_cycles);
204:
205: printf("addr:\tcycles:\n");
206: show = (show < active ? show : active);
207: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
208: addr = *sort_arr;
209: count = data[addr].cycles;
210: percentage = 100.0*count/dsp_profile.ram.counters.cycles;
211: printf("0x%04x\t%5.2f%%\t%"PRIu64"%s\n", addr, percentage, count,
212: count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
213: }
214: printf("%d DSP addresses listed.\n", show);
215: }
216:
217:
218: /**
219: * compare function for qsort() to sort DSP profile data by descdending
220: * address access counts.
221: */
222: static int cmp_dsp_count(const void *p1, const void *p2)
223: {
224: Uint64 count1 = dsp_profile.data[*(const Uint16*)p1].count;
225: Uint64 count2 = dsp_profile.data[*(const Uint16*)p2].count;
226: if (count1 > count2) {
227: return -1;
228: }
229: if (count1 < count2) {
230: return 1;
231: }
232: return 0;
233: }
234:
235: /**
236: * Sort DSP profile data addresses by call counts and show the results.
237: * If symbols are requested and symbols are loaded, show (only) addresses
238: * matching a symbol.
239: */
240: void Profile_DspShowCounts(int show, bool only_symbols)
241: {
242: dsp_profile_item_t *data = dsp_profile.data;
243: int symbols, matched, active;
244: Uint16 *sort_arr, *end, addr;
245: const char *name;
246: float percentage;
247: Uint64 count;
248:
249: if (!data) {
250: fprintf(stderr, "ERROR: no DSP profiling data available!\n");
251: return;
252: }
253: active = dsp_profile.ram.active;
254: show = (show < active ? show : active);
255:
256: sort_arr = dsp_profile.sort_arr;
257: qsort(sort_arr, active, sizeof(*sort_arr), cmp_dsp_count);
258:
259: if (!only_symbols) {
260: printf("addr:\tcount:\n");
261: for (end = sort_arr + show; sort_arr < end; sort_arr++) {
262: addr = *sort_arr;
263: count = data[addr].count;
264: percentage = 100.0*count/dsp_profile.ram.counters.count;
265: printf("0x%04x\t%5.2f%%\t%"PRIu64"%s\n",
266: addr, percentage, count,
267: count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
268: }
269: printf("%d DSP addresses listed.\n", show);
270: return;
271: }
272:
273: symbols = Symbols_DspCount();
274: if (!symbols) {
275: fprintf(stderr, "ERROR: no DSP symbols loaded!\n");
276: return;
277: }
278: matched = 0;
279:
280: printf("addr:\tcount:\t\tsymbol:\n");
281: for (end = sort_arr + active; sort_arr < end; sort_arr++) {
282:
283: addr = *sort_arr;
284: name = Symbols_GetByDspAddress(addr);
285: if (!name) {
286: continue;
287: }
288: count = data[addr].count;
289: percentage = 100.0*count/dsp_profile.ram.counters.count;
290: printf("0x%04x\t%.2f%%\t%"PRIu64"\t%s%s\n",
291: addr, percentage, count, name,
292: count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
293:
294: matched++;
295: if (matched >= show || matched >= symbols) {
296: break;
297: }
298: }
299: printf("%d DSP symbols listed.\n", matched);
300: }
301:
302:
303: static const char * addr2name(Uint32 addr, Uint64 *total)
304: {
305: *total = dsp_profile.data[addr].count;
306: return Symbols_GetByDspAddress(addr);
307: }
308:
309: /**
310: * Output DSP callers info to given file.
311: */
312: void Profile_DspShowCallers(FILE *fp)
313: {
314: Profile_ShowCallers(fp, dsp_callinfo.sites, dsp_callinfo.site, addr2name);
315: }
316:
317: /**
318: * Save DSP profile information to given file.
319: */
320: void Profile_DspSave(FILE *out)
321: {
322: /* Comma separated descriptions for the profile disassembly data fields.
323: * Instructions and cycles need to be first two fields!
324: */
325: fputs("Field names:\tExecuted instructions, Used cycles, Largest cycle differences (= code changes during profiling)\n", out);
326: /* (Python) pegexp that matches address and all describled fields from disassembly:
327: * <space>:<address> <opcodes> (<instr cycles>) <instr> <count>% (<count>, <cycles>)
328: * p:0202 0aa980 000200 (07 cyc) jclr #0,x:$ffe9,p:$0200 0.00% (6, 42)
329: */
330: fputs("Field regexp:\t^p:([0-9a-f]+) .*% \\((.*)\\)$\n", out);
331: Profile_DspShowAddresses(0, DSP_PROFILE_ARR_SIZE, out);
332: Profile_DspShowCallers(out);
333: }
334:
335: /* ------------------ DSP profile control ----------------- */
336:
337: /**
338: * Initialize DSP profiling when necessary. Return true if profiling.
339: */
340: bool Profile_DspStart(void)
341: {
342: dsp_profile_item_t *item;
343: int i;
344:
345: Profile_FreeCallinfo(&(dsp_callinfo));
346: if (dsp_profile.sort_arr) {
347: /* remove previous results */
348: free(dsp_profile.sort_arr);
349: free(dsp_profile.data);
350: dsp_profile.sort_arr = NULL;
351: dsp_profile.data = NULL;
352: printf("Freed previous DSP profile buffers.\n");
353: }
354: if (!dsp_profile.enabled) {
355: return false;
356: }
357: /* zero everything */
358: memset(&dsp_profile, 0, sizeof(dsp_profile));
359:
360: dsp_profile.data = calloc(DSP_PROFILE_ARR_SIZE, sizeof(*dsp_profile.data));
361: if (!dsp_profile.data) {
362: perror("ERROR, new DSP profile buffer alloc failed");
363: return false;
364: }
365: printf("Allocated DSP profile buffer (%d KB).\n",
366: (int)sizeof(*dsp_profile.data)*DSP_PROFILE_ARR_SIZE/1024);
367:
368: Profile_AllocCallinfo(&(dsp_callinfo), Symbols_DspCount(), "DSP");
369:
370: item = dsp_profile.data;
371: for (i = 0; i < DSP_PROFILE_ARR_SIZE; i++, item++) {
372: item->min_cycle = 0xFFFF;
373: }
374: dsp_profile.prev_pc = DSP_GetPC();
375:
1.1.1.2 root 376: dsp_profile.loop_start = 0xFFFF;
377: dsp_profile.loop_end = 0xFFFF;
378: dsp_profile.loop_count = 0;
379: Profile_LoopReset();
380:
1.1 root 381: dsp_profile.disasm_addr = 0;
382: dsp_profile.processed = false;
383: dsp_profile.enabled = true;
384: return dsp_profile.enabled;
385: }
386:
387: /* return true if pc is next instruction for previous pc */
388: static bool is_prev_instr(Uint16 prev_pc, Uint16 pc)
389: {
390: /* just moved to next instruction (1-2 words)? */
391: if (prev_pc < pc && (pc - prev_pc) <= 4) {
392: return true;
393: }
394: return false;
395: }
396:
397: /* return branch type based on caller instruction type */
398: static calltype_t dsp_opcode_type(Uint16 prev_pc, Uint16 pc)
399: {
400: const char *dummy;
401: Uint32 opcode;
402:
403: /* 24-bit instruction opcode */
404: opcode = DSP_ReadMemory(prev_pc, 'P', &dummy) & 0xFFFFFF;
405:
406: /* subroutine returns */
407: if (opcode == 0xC) { /* (just) RTS */
408: return CALL_SUBRETURN;
409: }
410: /* unconditional subroutine calls */
411: if ((opcode & 0xFFF000) == 0xD0000 || /* JSR 00001101 0000aaaa aaaaaaaa */
412: (opcode & 0xFFC0FF) == 0xBC080) { /* JSR 00001011 11MMMRRR 10000000 */
413: return CALL_SUBROUTINE;
414: }
415: /* conditional subroutine calls */
416: if ((opcode & 0xFF0000) == 0xF0000 || /* JSCC 00001111 CCCCaaaa aaaaaaaa */
417: (opcode & 0xFFC0F0) == 0xBC0A0 || /* JSCC 00001011 11MMMRRR 1010CCCC */
418: (opcode & 0xFFC0A0) == 0xB4080 || /* JSCLR 00001011 01MMMRRR 1S0bbbbb */
419: (opcode & 0xFFC0A0) == 0xB0080 || /* JSCLR 00001011 00aaaaaa 1S0bbbbb */
420: (opcode & 0xFFC0A0) == 0xB8080 || /* JSCLR 00001011 10pppppp 1S0bbbbb */
421: (opcode & 0xFFC0E0) == 0xBC000 || /* JSCLR 00001011 11DDDDDD 000bbbbb */
422: (opcode & 0xFFC0A0) == 0xB40A0 || /* JSSET 00001011 01MMMRRR 1S1bbbbb */
423: (opcode & 0xFFC0A0) == 0xB00A0 || /* JSSET 00001011 00aaaaaa 1S1bbbbb */
424: (opcode & 0xFFC0A0) == 0xB80A0 || /* JSSET 00001011 10pppppp 1S1bbbbb */
425: (opcode & 0xFFC0E0) == 0xBC020) { /* JSSET 00001011 11DDDDDD 001bbbbb */
426: /* hopefully fairly safe heuristic:
427: * if previously executed instruction
428: * was one before current one, no
429: * subroutine call was made to next
430: * instruction, the condition just
431: * wasn't met.
432: */
433: if (is_prev_instr(prev_pc, pc)) {
434: return CALL_NEXT;
435: }
436: return CALL_SUBROUTINE;
437: }
438: /* exception handler returns */
439: if (opcode == 0x4) { /* (just) RTI */
440: return CALL_EXCRETURN;
441: }
442:
443: /* Besides CALL_UNKNOWN, rest isn't used by subroutine call
444: * cost collection. However, it's useful info when debugging
445: * code or reading full callgraphs (because optimized code uses
446: * also jumps/branches for subroutine calls).
447: */
448:
449: /* TODO: exception invocation.
450: * Could be detected by PC going through low interrupt vector adresses,
451: * but fast-calls using JSR/RTS would need separate handling.
452: */
453: if (0) { /* TODO */
454: return CALL_EXCEPTION;
455: }
456: /* branches */
457: if ((opcode & 0xFFF000) == 0xC0000 || /* JMP 00001100 0000aaaa aaaaaaaa */
458: (opcode & 0xFFC0FF) == 0xAC080 || /* JMP 00001010 11MMMRRR 10000000 */
459: (opcode & 0xFF0000) == 0xE0000 || /* JCC 00001110 CCCCaaaa aaaaaaaa */
460: (opcode & 0xFFC0F0) == 0xAC0A0 || /* JCC 00001010 11MMMRRR 1010CCCC */
461: (opcode & 0xFFC0A0) == 0xA8080 || /* JCLR 00001010 10pppppp 1S0bbbbb */
462: (opcode & 0xFFC0A0) == 0xA4080 || /* JCLR 00001010 01MMMRRR 1S0bbbbb */
463: (opcode & 0xFFC0A0) == 0xA0080 || /* JCLR 00001010 00aaaaaa 1S0bbbbb */
464: (opcode & 0xFFC0E0) == 0xAC000 || /* JCLR 00001010 11dddddd 000bbbbb */
465: (opcode & 0xFFC0A0) == 0xA80A0 || /* JSET 00001010 10pppppp 1S1bbbbb */
466: (opcode & 0xFFC0A0) == 0xA40A0 || /* JSET 00001010 01MMMRRR 1S1bbbbb */
467: (opcode & 0xFFC0A0) == 0xA00A0 || /* JSET 00001010 00aaaaaa 1S1bbbbb */
468: (opcode & 0xFFC0E0) == 0xAC020 || /* JSET 00001010 11dddddd 001bbbbb */
469: (opcode & 0xFF00F0) == 0x600A0 || /* REP 00000110 iiiiiiii 1010hhhh */
470: (opcode & 0xFFC0FF) == 0x6C020 || /* REP 00000110 11dddddd 00100000 */
471: (opcode & 0xFFC0BF) == 0x64020 || /* REP 00000110 01MMMRRR 0s100000 */
472: (opcode & 0xFFC0BF) == 0x60020 || /* REP 00000110 00aaaaaa 0s100000 */
473: (opcode & 0xFF00F0) == 0x60080 || /* DO/ENDO 00000110 iiiiiiii 1000hhhh */
474: (opcode & 0xFFC0FF) == 0x6C000 || /* DO/ENDO 00000110 11DDDDDD 00000000 */
475: (opcode & 0xFFC0BF) == 0x64000 || /* DO/ENDO 00000110 01MMMRRR 0S000000 */
476: (opcode & 0xFFC0BF) == 0x60000) { /* DO/ENDO 00000110 00aaaaaa 0S000000 */
477: return CALL_BRANCH;
478: }
479: if (is_prev_instr(prev_pc, pc)) {
480: return CALL_NEXT;
481: }
482: return CALL_UNKNOWN;
483: }
484:
485: /**
486: * If call tracking is enabled (there are symbols), collect
487: * information about subroutine and other calls, and their costs.
488: *
489: * Like with profile data, caller info checks need to be for previous
490: * instruction, that's why "pc" argument for this function actually
491: * needs to be previous PC.
492: */
493: static void collect_calls(Uint16 pc, counters_t *counters)
494: {
495: calltype_t flag;
496: Uint16 prev_pc;
497: Uint32 caller_pc;
498: int idx;
499:
500: prev_pc = dsp_callinfo.prev_pc;
501: dsp_callinfo.prev_pc = pc;
502: caller_pc = PC_UNDEFINED;
503:
504: /* address is return address for last subroutine call? */
505: if (unlikely(pc == dsp_callinfo.return_pc) && likely(dsp_callinfo.depth)) {
506:
507: flag = dsp_opcode_type(prev_pc, pc);
508: /* return address is entered either by subroutine return,
509: * or by returning from exception that interrupted
510: * the instruction at return address.
511: */
512: if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) {
513: caller_pc = Profile_CallEnd(&dsp_callinfo, counters);
514: }
515: }
516:
517: /* address is one which we're tracking? */
518: idx = Symbols_GetDspAddressIndex(pc);
519: if (unlikely(idx >= 0)) {
520:
521: flag = dsp_opcode_type(prev_pc, pc);
522: if (flag == CALL_SUBROUTINE) {
523: dsp_callinfo.return_pc = DSP_GetNextPC(prev_pc); /* slow! */
524: } else if (caller_pc != PC_UNDEFINED) {
525: /* returned from function, change return
526: * instruction address to address of
527: * what did the returned call.
528: */
529: prev_pc = caller_pc;
530: assert(is_prev_instr(prev_pc, pc));
531: flag = CALL_NEXT;
532: }
533: Profile_CallStart(idx, &dsp_callinfo, prev_pc, flag, pc, counters);
534:
535: }
536: }
537:
538: /**
1.1.1.2 root 539: * log last loop info, if there's suitable data for one
540: */
541: static void log_last_loop(void)
542: {
543: unsigned len = dsp_profile.loop_end - dsp_profile.loop_start;
544: if (dsp_profile.loop_count > 1 && (len < profile_loop.dsp_limit || !profile_loop.dsp_limit)) {
545: fprintf(profile_loop.fp, "DSP %d 0x%04x %d %d\n", nVBLs,
546: dsp_profile.loop_start, len, dsp_profile.loop_count);
547: fflush(profile_loop.fp);
548: }
549: }
550:
551: /**
1.1 root 552: * Update DSP cycle and count statistics for PC address.
553: *
554: * This is called after instruction is executed and PC points
555: * to next instruction i.e. info is for previous PC address.
556: */
557: void Profile_DspUpdate(void)
558: {
559: dsp_profile_item_t *prev;
560: Uint16 pc, prev_pc, cycles;
561: counters_t *counters;
562:
563: prev_pc = dsp_profile.prev_pc;
564: dsp_profile.prev_pc = pc = DSP_GetPC();
1.1.1.2 root 565:
566: if (unlikely(profile_loop.fp)) {
567: if (pc < prev_pc) {
568: if (pc == dsp_profile.loop_start && prev_pc == dsp_profile.loop_end) {
569: dsp_profile.loop_count++;
570: } else {
571: dsp_profile.loop_start = pc;
572: dsp_profile.loop_end = prev_pc;
573: dsp_profile.loop_count = 1;
574: }
575: } else {
576: if (pc > dsp_profile.loop_end) {
577: log_last_loop();
578: dsp_profile.loop_end = 0xFFFF;
579: dsp_profile.loop_count = 0;
580: }
581: }
582: }
583:
1.1 root 584: prev = dsp_profile.data + prev_pc;
585:
586: if (likely(prev->count < MAX_DSP_PROFILE_VALUE)) {
587: prev->count++;
588: }
589:
590: cycles = DSP_GetInstrCycles();
591: if (likely(prev->cycles < MAX_DSP_PROFILE_VALUE - cycles)) {
592: prev->cycles += cycles;
593: } else {
594: prev->cycles = MAX_DSP_PROFILE_VALUE;
595: }
596:
597: if (unlikely(cycles < prev->min_cycle)) {
598: prev->min_cycle = cycles;
599: }
600: if (unlikely(cycles > prev->max_cycle)) {
601: prev->max_cycle = cycles;
602: }
603:
604: counters = &(dsp_profile.ram.counters);
605: if (dsp_callinfo.sites) {
606: collect_calls(prev_pc, counters);
607: }
608: /* counters are increased after caller info is processed,
609: * otherwise cost for the instruction calling the callee
610: * doesn't get accounted to caller (but callee).
611: */
612: counters->cycles += cycles;
613: counters->count++;
614: }
615:
616: /**
617: * Helper for collecting DSP profile area statistics.
618: */
619: static void update_area_item(profile_area_t *area, Uint16 addr, dsp_profile_item_t *item)
620: {
621: Uint64 cycles = item->cycles;
622: Uint64 count = item->count;
623: Uint16 diff;
624:
625: if (!count) {
626: return;
627: }
628: if (cycles == MAX_DSP_PROFILE_VALUE) {
629: area->overflow = true;
630: }
631: if (item->max_cycle) {
632: diff = item->max_cycle - item->min_cycle;
633: } else {
634: diff = 0;
635: }
636:
637: area->counters.count += count;
638: area->counters.cycles += cycles;
1.1.1.3 ! root 639: area->counters.cycles_diffs += diff;
1.1 root 640:
641: if (addr < area->lowest) {
642: area->lowest = addr;
643: }
644: area->highest = addr;
645:
646: area->active++;
647: }
648:
649: /**
650: * Stop and process the DSP profiling data; collect stats and
651: * prepare for more optimal sorting.
652: */
653: void Profile_DspStop(void)
654: {
655: dsp_profile_item_t *item;
656: profile_area_t *area;
657: Uint16 *sort_arr;
658: Uint32 addr;
659:
660: if (dsp_profile.processed || !dsp_profile.enabled) {
661: return;
662: }
663:
1.1.1.2 root 664: log_last_loop();
665: if (profile_loop.fp) {
666: fflush(profile_loop.fp);
667: }
668:
1.1 root 669: Profile_FinalizeCalls(&(dsp_callinfo), &(dsp_profile.ram.counters), Symbols_GetByDspAddress);
670:
671: /* find lowest and highest addresses executed */
672: area = &dsp_profile.ram;
673: memset(area, 0, sizeof(profile_area_t));
674: area->lowest = DSP_PROFILE_ARR_SIZE;
675:
676: item = dsp_profile.data;
677: for (addr = 0; addr < DSP_PROFILE_ARR_SIZE; addr++, item++) {
678: update_area_item(area, addr, item);
679: }
680:
681: /* allocate address array for sorting */
682: sort_arr = calloc(dsp_profile.ram.active, sizeof(*sort_arr));
683:
684: if (!sort_arr) {
685: perror("ERROR: allocating DSP profile address data");
686: free(dsp_profile.data);
687: dsp_profile.data = NULL;
688: return;
689: }
690: printf("Allocated DSP profile address buffer (%d KB).\n",
691: (int)sizeof(*sort_arr)*(dsp_profile.ram.active+512)/1024);
692: dsp_profile.sort_arr = sort_arr;
693:
694: /* ...and fill addresses for used instructions... */
695: area = &dsp_profile.ram;
696: item = &(dsp_profile.data[area->lowest]);
697: for (addr = area->lowest; addr <= area->highest; addr++, item++) {
698: if (item->count) {
699: *sort_arr++ = addr;
700: }
701: }
702: //printf("%d/%d/%d\n", area->active, sort_arr-dsp_profile.sort_arr, active);
703:
704: Profile_DspShowStats();
705: dsp_profile.processed = true;
706: }
707:
708: /**
709: * Get pointers to DSP profile enabling and disasm address variables
710: * for updating them (in parser).
711: */
712: void Profile_DspGetPointers(bool **enabled, Uint32 **disasm_addr)
713: {
714: *disasm_addr = &dsp_profile.disasm_addr;
715: *enabled = &dsp_profile.enabled;
716: }
717:
718: /**
719: * Get callinfo & symbol search pointers for stack walking.
720: */
721: void Profile_DspGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32))
722: {
723: *callinfo = &(dsp_callinfo);
724: *get_symbol = Symbols_GetByDspAddress;
725: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.