Annotation of hatari/src/debug/profile.c, revision 1.1.1.1

1.1       root        1: /*
                      2:  * Hatari - profile.c
                      3:  * 
                      4:  * Copyright (C) 2010 by Eero Tamminen
                      5:  *
                      6:  * This file is distributed under the GNU Public License, version 2 or at
                      7:  * your option any later version. Read the file gpl.txt for details.
                      8:  *
                      9:  * profile.c - functions for profiling CPU and DSP and showing the results.
                     10:  */
                     11: const char Profile_fileid[] = "Hatari profile.c : " __DATE__ " " __TIME__;
                     12: 
                     13: #include <stdio.h>
                     14: #include "main.h"
                     15: #include "debug_priv.h"
                     16: #include "dsp.h"
                     17: #include "m68000.h"
                     18: #include "profile.h"
                     19: #include "stMemory.h"
                     20: #include "symbols.h"
                     21: #include "tos.h"
                     22: 
                     23: #define MAX_PROFILE_VALUE 0xFFFFFFFF
                     24: 
                     25: typedef struct {
                     26:        Uint32 count;   /* how many times this address is used */
                     27:        Uint32 cycles;  /* what address this is (for sorting) */
                     28: } profile_item_t;
                     29: 
                     30: typedef struct {
                     31:        unsigned long long all_cycles, all_count;
                     32:        Uint32 max_cycles, max_cycles_addr;
                     33:        Uint32 max_count, max_count_addr;
                     34:        Uint32 lowest, highest; /* active address range within memory area */
                     35:        Uint32 active;          /* number of active addresses */
                     36: } profile_area_t;
                     37: 
                     38: static struct {
                     39:        unsigned long long all_cycles, all_count;
                     40:        Uint32 size;          /* number of allocated profile data items */
                     41:        profile_item_t *data; /* profile data items */
                     42:        profile_area_t ram;   /* normal RAM stats */
                     43:        profile_area_t rom;   /* cartridge ROM stats */
                     44:        profile_area_t tos;   /* ROM TOS stats */
                     45:        Uint32 active;        /* number of active data items in all areas */
                     46:        Uint32 *sort_arr;     /* data indexes used for sorting */
                     47:        bool enabled;         /* true when profiling enabled */
                     48: } cpu_profile;
                     49: 
                     50: 
                     51: #define DSP_PROFILE_ARR_SIZE 0x10000
                     52: 
                     53: static struct {
                     54:        profile_item_t *data; /* profile data */
                     55:        profile_area_t ram;   /* normal RAM stats */
                     56:        Uint16 *sort_arr;     /* data indexes used for sorting */
                     57:        bool enabled;         /* true when profiling enabled */
                     58: } dsp_profile;
                     59: 
                     60: 
                     61: /* ------------------ CPU profile results ----------------- */
                     62: 
                     63: /**
                     64:  * convert Atari memory address to sorting array profile data index.
                     65:  */
                     66: static inline Uint32 address2index(Uint32 pc)
                     67: {
                     68:        if (unlikely(pc & 1)) {
                     69:                fprintf(stderr, "WARNING: odd CPU profile instruction address 0x%x!\n", pc);
                     70:        }
                     71:        if (pc >= TosAddress && pc < TosAddress + TosSize) {
                     72:                /* TOS, put it after RAM & ROM data */
                     73:                pc = pc - TosAddress + STRamEnd + 0x20000;
                     74:        
                     75:        } else if (pc >= 0xFA0000 && pc < 0xFC0000) {
                     76:                /* ROM, put it after RAM data */
                     77:                pc = pc - 0xFA0000 + STRamEnd;
                     78: 
                     79:        } else {
                     80:                /* if in RAM, use as-is */
                     81:                if (unlikely(pc >= STRamEnd)) {
                     82:                        fprintf(stderr, "WARNING: 'invalid' CPU PC profile instruction address 0x%x, skipping!\n", pc);
                     83:                        /* extra entry at end reserved for invalid PC values */
                     84:                        pc = STRamEnd + 0x20000 + TosSize;
                     85:                }
                     86:        }
                     87:        /* CPU instructions are at even addresses, save space by halving */
                     88:        return (pc >> 1);
                     89: }
                     90: 
                     91: 
                     92: /**
                     93:  * Get CPU cycles & count for given address.
                     94:  * Return true if data was available and non-zero, false otherwise.
                     95:  */
                     96: bool Profile_CpuAddressData(Uint32 addr, Uint32 *count, Uint32 *cycles)
                     97: {
                     98:        Uint32 idx;
                     99:        if (!cpu_profile.data) {
                    100:                return false;
                    101:        }
                    102:        idx = address2index(addr);
                    103:        *cycles = cpu_profile.data[idx].cycles;
                    104:        *count = cpu_profile.data[idx].count;
                    105:        return (*count > 0);
                    106: }
                    107: 
                    108: 
                    109: /**
                    110:  * convert sorting array profile data index to Atari memory address.
                    111:  */
                    112: static Uint32 index2address(Uint32 idx)
                    113: {
                    114:        idx <<= 1;
                    115:        /* RAM */
                    116:        if (idx < STRamEnd) {
                    117:                return idx;
                    118:        }
                    119:        /* ROM */
                    120:        idx -= STRamEnd;
                    121:        if (idx < 0x20000) {
                    122:                return idx + 0xFA0000;
                    123:        }
                    124:        /* TOS */
                    125:        return idx - 0x20000 + TosAddress;
                    126: }
                    127: 
                    128: 
                    129: /**
                    130:  * Helper to show statistics for specified CPU profile area.
                    131:  */
                    132: static void show_cpu_area_stats(profile_area_t *area)
                    133: {
                    134:        if (!area->active) {
                    135:                fprintf(stderr, "- no activity\n");
                    136:                return;
                    137:        }
                    138:        fprintf(stderr, "- active address range:\n  0x%06x-0x%06x\n",
                    139:                index2address(area->lowest),
                    140:                index2address(area->highest));
                    141:        fprintf(stderr, "- active instruction addresses:\n  %d (%.2f%% of all)\n",
                    142:                area->active,
                    143:                (float)area->active/cpu_profile.active*100);
                    144:        fprintf(stderr, "- executed instructions:\n  %llu (%.2f%% of all)\n",
                    145:                area->all_count,
                    146:                (float)area->all_count/cpu_profile.all_count*100);
                    147:        fprintf(stderr, "- used cycles:\n  %llu (%.2f%% of all)\n",
                    148:                area->all_cycles,
                    149:                (float)area->all_cycles/cpu_profile.all_cycles*100);
                    150:        fprintf(stderr, "- address with most cycles:\n  0x%06x, %d cycles (%.2f%% of all in area)\n",
                    151:                index2address(area->max_cycles_addr),
                    152:                area->max_cycles,
                    153:                (float)area->max_cycles/area->all_cycles*100);
                    154:        fprintf(stderr, "- address with most hits:\n  0x%06x, %d hits (%.2f%% of all in area)\n",
                    155:                index2address(area->max_count_addr),
                    156:                area->max_count,
                    157:                (float)area->max_count/area->all_count*100);
                    158:        if (area->max_cycles == MAX_PROFILE_VALUE) {
                    159:                fprintf(stderr, "- Counters OVERFLOW!\n");
                    160:        }
                    161: }
                    162: 
                    163: 
                    164: /**
                    165:  * show CPU area (RAM, ROM, TOS) specific statistics.
                    166:  */
                    167: void Profile_CpuShowStats(void)
                    168: {
                    169:        fprintf(stderr, "Normal RAM (0-0x%X):\n", STRamEnd);
                    170:        show_cpu_area_stats(&cpu_profile.ram);
                    171: 
                    172:        fprintf(stderr, "Cartridge ROM (0xFA0000-0xFC0000):\n");
                    173:        show_cpu_area_stats(&cpu_profile.rom);
                    174: 
                    175:        fprintf(stderr, "ROM TOS (0x%X-0x%X):\n", TosAddress, TosAddress+TosSize);
                    176:        show_cpu_area_stats(&cpu_profile.tos);
                    177: }
                    178: 
                    179: 
                    180: /**
                    181:  * compare function for qsort() to sort CPU profile data by descdending
                    182:  * address cycles counts.
                    183:  */
                    184: static int profile_by_cpu_cycles(const void *p1, const void *p2)
                    185: {
                    186:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].cycles;
                    187:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].cycles;
                    188:        if (count1 > count2) {
                    189:                return -1;
                    190:        }
                    191:        if (count1 < count2) {
                    192:                return 1;
                    193:        }
                    194:        return 0;
                    195: }
                    196: 
                    197: /**
                    198:  * Sort CPU profile data addresses by cycle counts and show the results.
                    199:  */
                    200: void Profile_CpuShowCycles(unsigned int show)
                    201: {
                    202:        unsigned int active;
                    203:        Uint32 *sort_arr, *end, addr;
                    204:        profile_item_t *data = cpu_profile.data;
                    205:        float percentage;
                    206:        Uint32 count;
                    207: 
                    208:        if (!data) {
                    209:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
                    210:                return;
                    211:        }
                    212: 
                    213:        active = cpu_profile.active;
                    214:        sort_arr = cpu_profile.sort_arr;
                    215:        qsort(sort_arr, active, sizeof(*sort_arr), profile_by_cpu_cycles);
                    216: 
                    217:        printf("addr:\t\tcycles:\n");
                    218:        show = (show < active ? show : active);
                    219:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    220:                addr = index2address(*sort_arr);
                    221:                count = data[*sort_arr].cycles;
                    222:                percentage = 100.0*count/cpu_profile.all_cycles;
                    223:                printf("0x%06x\t%.2f%%\t%d%s\n", addr, percentage, count,
                    224:                       count == MAX_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    225:        }
                    226:        printf("%d CPU addresses listed.\n", show);
                    227: }
                    228: 
                    229: 
                    230: /**
                    231:  * compare function for qsort() to sort CPU profile data by descdending
                    232:  * address access counts.
                    233:  */
                    234: static int profile_by_cpu_count(const void *p1, const void *p2)
                    235: {
                    236:        Uint32 count1 = cpu_profile.data[*(const Uint32*)p1].count;
                    237:        Uint32 count2 = cpu_profile.data[*(const Uint32*)p2].count;
                    238:        if (count1 > count2) {
                    239:                return -1;
                    240:        }
                    241:        if (count1 < count2) {
                    242:                return 1;
                    243:        }
                    244:        return 0;
                    245: }
                    246: 
                    247: /**
                    248:  * Sort CPU profile data addresses by call counts and show the results.
                    249:  * If symbols are requested and symbols are loaded, show (only) addresses
                    250:  * matching a symbol.
                    251:  */
                    252: void Profile_CpuShowCounts(unsigned int show, bool only_symbols)
                    253: {
                    254:        profile_item_t *data = cpu_profile.data;
                    255:        unsigned int symbols, matched, active;
                    256:        Uint32 *sort_arr, *end, addr;
                    257:        const char *name;
                    258:        float percentage;
                    259:        Uint32 count;
                    260: 
                    261:        if (!data) {
                    262:                fprintf(stderr, "ERROR: no CPU profiling data available!\n");
                    263:                return;
                    264:        }
                    265:        active = cpu_profile.active;
                    266:        show = (show < active ? show : active);
                    267: 
                    268:        sort_arr = cpu_profile.sort_arr;
                    269:        qsort(sort_arr, active, sizeof(*sort_arr), profile_by_cpu_count);
                    270: 
                    271:        if (!only_symbols) {
                    272:                printf("addr:\t\tcount:\n");
                    273:                for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    274:                        addr = index2address(*sort_arr);
                    275:                        count = data[*sort_arr].count;
                    276:                        percentage = 100.0*count/cpu_profile.all_count;
                    277:                        printf("0x%06x\t%.2f%%\t%d%s\n",
                    278:                               addr, percentage, count,
                    279:                               count == MAX_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    280:                }
                    281:                printf("%d CPU addresses listed.\n", show);
                    282:                return;
                    283:        }
                    284: 
                    285:        symbols = Symbols_CpuCount();
                    286:        if (!symbols) {
                    287:                fprintf(stderr, "ERROR: no CPU symbols loaded!\n");
                    288:                return;
                    289:        }
                    290:        matched = 0;    
                    291: 
                    292:        printf("addr:\t\tcount:\t\tsymbol:\n");
                    293:        for (end = sort_arr + active; sort_arr < end; sort_arr++) {
                    294: 
                    295:                addr = index2address(*sort_arr);
                    296:                name = Symbols_GetByCpuAddress(addr);
                    297:                if (!name) {
                    298:                        continue;
                    299:                }
                    300:                count = data[*sort_arr].count;
                    301:                percentage = 100.0*count/cpu_profile.all_count;
                    302:                printf("0x%06x\t%.2f%%\t%d\t%s%s\n",
                    303:                       addr, percentage, count, name,
                    304:                       count == MAX_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    305: 
                    306:                matched++;
                    307:                if (matched >= show || matched >= symbols) {
                    308:                        break;
                    309:                }
                    310:        }
                    311:        printf("%d CPU symbols listed.\n", matched);
                    312: }
                    313: 
                    314: 
                    315: /* ------------------ CPU profile control ----------------- */
                    316: 
                    317: /**
                    318:  * Initialize CPU profiling when necessary.  Return true if profiling.
                    319:  */
                    320: bool Profile_CpuStart(void)
                    321: {
                    322:        if (cpu_profile.sort_arr) {
                    323:                /* remove previous results */
                    324:                free(cpu_profile.sort_arr);
                    325:                free(cpu_profile.data);
                    326:                cpu_profile.sort_arr = NULL;
                    327:                cpu_profile.data = NULL;
                    328:                printf("Freed previous CPU profile buffers.\n");
                    329:        }
                    330:        if (!cpu_profile.enabled) {
                    331:                return false;
                    332:        }
                    333:        /* Shouldn't change within same debug session */
                    334:        cpu_profile.size = (STRamEnd + 0x20000 + TosSize) / 2;
                    335: 
                    336:        /* Add one entry for catching invalid PC values */
                    337:        cpu_profile.data = calloc(cpu_profile.size+1, sizeof(*cpu_profile.data));
                    338:        if (cpu_profile.data) {
                    339:                printf("Allocated CPU profile buffer (%d MB).\n",
                    340:                       (int)sizeof(*cpu_profile.data)*cpu_profile.size/1024/1024);
                    341:        } else {
                    342:                perror("ERROR, new CPU profile buffer alloc failed");
                    343:                cpu_profile.enabled = false;
                    344:        }
                    345:        return cpu_profile.enabled;
                    346: }
                    347: 
                    348: 
                    349: /**
                    350:  * Update CPU cycle and count statistics for PC address.
                    351:  */
                    352: void Profile_CpuUpdate(void)
                    353: {
                    354:        Uint32 idx, opcode, cycles;
                    355:        
                    356:        idx = address2index(M68000_GetPC());
                    357: 
                    358:        if (likely(cpu_profile.data[idx].count < MAX_PROFILE_VALUE)) {
                    359:                cpu_profile.data[idx].count++;
                    360:        }
                    361:        
                    362:        opcode = get_iword_prefetch (0);
                    363:        cycles = (*cpufunctbl[opcode])(opcode) + nWaitStateCycles;
                    364:        
                    365:        if (likely(cpu_profile.data[idx].cycles < MAX_PROFILE_VALUE - cycles)) {
                    366:                        cpu_profile.data[idx].cycles += cycles;
                    367:        }
                    368: }
                    369: 
                    370: 
                    371: /**
                    372:  * Helper for collecting profile area statistics.
                    373:  */
                    374: static void update_area(Uint32 i, profile_item_t *item, profile_area_t *area)
                    375: {
                    376:        Uint32 cycles, count = item->count;
                    377:        if (!count) {
                    378:                return;
                    379:        }
                    380: 
                    381:        area->all_count += count;
                    382:        if (count > area->max_count) {
                    383:                area->max_count = count;
                    384:                area->max_count_addr = i;
                    385:        }
                    386: 
                    387:        cycles = item->cycles;
                    388:        area->all_cycles += cycles;
                    389:        if (cycles > area->max_cycles) {
                    390:                area->max_cycles = cycles;
                    391:                area->max_cycles_addr = i;
                    392:        }
                    393: 
                    394:        if (i < area->lowest) {
                    395:                area->lowest = i;
                    396:        }
                    397:        area->highest = i;
                    398: 
                    399:        area->active++;
                    400: }
                    401: 
                    402: 
                    403: /**
                    404:  * Stop and process the CPU profiling data; collect stats and
                    405:  * prepare for more optimal sorting.
                    406:  */
                    407: void Profile_CpuStop(void)
                    408: {
                    409:        profile_item_t *item;
                    410:        profile_area_t *area;
                    411:        Uint32 *sort_arr;
                    412:        Uint32 i, active;
                    413: 
                    414:        if (!cpu_profile.enabled) {
                    415:                return;
                    416:        }
                    417:        /* user didn't change RAM or TOS size in the meanwhile? */
                    418:        assert(cpu_profile.size == (STRamEnd + 0x20000 + TosSize) / 2);
                    419: 
                    420:        /* find lowest and highest addresses executed... */
                    421:        item = cpu_profile.data;
                    422: 
                    423:        /* ...for normal RAM */
                    424:        area = &cpu_profile.ram;
                    425:        memset(area, 0, sizeof(profile_area_t));
                    426:        area->lowest = cpu_profile.size;
                    427: 
                    428:        for (i = 0; i < STRamEnd/2; i++, item++) {
                    429:                update_area(i, item, area);
                    430:        }
                    431: 
                    432:        /* ... for Cartridge ROM */
                    433:        area = &cpu_profile.rom;
                    434:        memset(area, 0, sizeof(profile_area_t));
                    435:        area->lowest = cpu_profile.size;
                    436: 
                    437:        for (; i < (STRamEnd + 0x20000)/2; i++, item++) {
                    438:                update_area(i, item, area);
                    439:        }
                    440: 
                    441:        /* ...for ROM TOS */
                    442:        area = &cpu_profile.tos;
                    443:        memset(area, 0, sizeof(profile_area_t));
                    444:        area->lowest = cpu_profile.size;
                    445: 
                    446:        for (; i < cpu_profile.size; i++, item++) {
                    447:                update_area(i, item, area);
                    448:        }
                    449: 
                    450:        cpu_profile.all_cycles = cpu_profile.ram.all_cycles + cpu_profile.rom.all_cycles + cpu_profile.tos.all_cycles;
                    451:        cpu_profile.all_count = cpu_profile.ram.all_count + cpu_profile.rom.all_count + cpu_profile.tos.all_count;
                    452: 
                    453:        /* allocate address array for sorting */
                    454:        active = cpu_profile.ram.active + cpu_profile.rom.active + cpu_profile.tos.active;
                    455:        sort_arr = calloc(active, sizeof(*sort_arr));
                    456: 
                    457:        if (!sort_arr) {
                    458:                perror("ERROR: allocating CPU profile address data");
                    459:                free(cpu_profile.data);
                    460:                cpu_profile.data = NULL;
                    461:                return;
                    462:        }
                    463:        printf("Allocated CPU profile address buffer (%d KB).\n",
                    464:               (int)sizeof(*sort_arr)*(active+512)/1024);
                    465:        cpu_profile.sort_arr = sort_arr;
                    466:        cpu_profile.active = active;
                    467: 
                    468:        /* and fill addresses for used instructions... */
                    469:        
                    470:        /* ...for normal RAM */
                    471:        area = &cpu_profile.ram;
                    472:        item = cpu_profile.data + area->lowest;
                    473:        for (i = area->lowest; i <= area->highest; i++, item++) {
                    474:                if (item->count) {
                    475:                        *sort_arr++ = i;
                    476:                }
                    477:        }
                    478: 
                    479:        /* ...for Cartridge ROM */
                    480:        area = &cpu_profile.rom;
                    481:        item = cpu_profile.data + area->lowest;
                    482:        for (i = area->lowest; i <= area->highest; i++, item++) {
                    483:                if (item->count) {
                    484:                        *sort_arr++ = i;
                    485:                }
                    486:        }
                    487: 
                    488:        /* ...for TOS ROM */
                    489:        area = &cpu_profile.tos;
                    490:        item = cpu_profile.data + area->lowest;
                    491:        for (i = area->lowest; i <= area->highest; i++, item++) {
                    492:                if (item->count) {
                    493:                        *sort_arr++ = i;
                    494:                }
                    495:        }
                    496:        //printf("%d/%d/%d\n", area->active, sort_arr-cpu_profile.sort_arr, active);
                    497: 
                    498:        Profile_CpuShowStats();
                    499:        return;
                    500: }
                    501: 
                    502: 
                    503: /* ------------------ DSP profile results ----------------- */
                    504: 
                    505: /**
                    506:  * Get DSP cycles & count for given address.
                    507:  * Return true if data was available and non-zero, false otherwise.
                    508:  */
                    509: bool Profile_DspAddressData(Uint16 addr, Uint32 *count, Uint32 *cycles)
                    510: {
                    511:        if (!dsp_profile.data) {
                    512:                return false;
                    513:        }
                    514:        *cycles = dsp_profile.data[addr].cycles;
                    515:        *count = dsp_profile.data[addr].count;
                    516:        return (*count > 0);
                    517: }
                    518: 
                    519: /**
                    520:  * show DSP specific profile statistics.
                    521:  */
                    522: void Profile_DspShowStats(void)
                    523: {
                    524:        profile_area_t *area = &dsp_profile.ram;
                    525:        fprintf(stderr, "DSP profile statistics (0x0-0xFFFF):\n");
                    526:        if (!area->active) {
                    527:                fprintf(stderr, "- no activity\n");
                    528:                return;
                    529:        }
                    530:        fprintf(stderr, "- active address range:\n  0x%04x-0x%04x\n",
                    531:                area->lowest, area->highest);
                    532:        fprintf(stderr, "- active instruction addresses:\n  %d\n",
                    533:                area->active);
                    534:        fprintf(stderr, "- executed instructions:\n  %llu\n",
                    535:                area->all_count);
                    536:        fprintf(stderr, "- used cycles:\n  %llu\n",
                    537:                area->all_cycles);
                    538:        fprintf(stderr, "- address with most cycles:\n  0x%04x, %d cycles (%.2f%% of all)\n",
                    539:                area->max_cycles_addr,
                    540:                area->max_cycles,
                    541:                (float)area->max_cycles/area->all_cycles*100);
                    542:        fprintf(stderr, "- address with most hits:\n  0x%04x, %d hits (%.2f%% of all)\n",
                    543:                area->max_count_addr,
                    544:                area->max_count,
                    545:                (float)area->max_count/area->all_count*100);
                    546:        if (area->max_cycles == MAX_PROFILE_VALUE) {
                    547:                fprintf(stderr, "- Counters OVERFLOW!\n");
                    548:        }
                    549: }
                    550: 
                    551: 
                    552: /**
                    553:  * compare function for qsort() to sort DSP profile data by descdending
                    554:  * address cycles counts.
                    555:  */
                    556: static int profile_by_dsp_cycles(const void *p1, const void *p2)
                    557: {
                    558:        Uint32 count1 = dsp_profile.data[*(const Uint16*)p1].cycles;
                    559:        Uint32 count2 = dsp_profile.data[*(const Uint16*)p2].cycles;
                    560:        if (count1 > count2) {
                    561:                return -1;
                    562:        }
                    563:        if (count1 < count2) {
                    564:                return 1;
                    565:        }
                    566:        return 0;
                    567: }
                    568: 
                    569: /**
                    570:  * Sort DSP profile data addresses by cycle counts and show the results.
                    571:  */
                    572: void Profile_DspShowCycles(unsigned int show)
                    573: {
                    574:        unsigned int active;
                    575:        Uint16 *sort_arr, *end, addr;
                    576:        profile_item_t *data = dsp_profile.data;
                    577:        float percentage;
                    578:        Uint32 count;
                    579: 
                    580:        if (!data) {
                    581:                fprintf(stderr, "ERROR: no DSP profiling data available!\n");
                    582:                return;
                    583:        }
                    584: 
                    585:        active = dsp_profile.ram.active;
                    586:        sort_arr = dsp_profile.sort_arr;
                    587:        qsort(sort_arr, active, sizeof(*sort_arr), profile_by_dsp_cycles);
                    588: 
                    589:        printf("addr:\tcycles:\n");
                    590:        show = (show < active ? show : active);
                    591:        for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    592:                addr = *sort_arr;
                    593:                count = data[addr].cycles;
                    594:                percentage = 100.0*count/dsp_profile.ram.all_cycles;
                    595:                printf("0x%04x\t%.2f%%\t%d%s\n", addr, percentage, count,
                    596:                       count == MAX_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    597:        }
                    598:        printf("%d DSP addresses listed.\n", show);
                    599: }
                    600: 
                    601: 
                    602: /**
                    603:  * compare function for qsort() to sort DSP profile data by descdending
                    604:  * address access counts.
                    605:  */
                    606: static int profile_by_dsp_count(const void *p1, const void *p2)
                    607: {
                    608:        Uint32 count1 = dsp_profile.data[*(const Uint16*)p1].count;
                    609:        Uint32 count2 = dsp_profile.data[*(const Uint16*)p2].count;
                    610:        if (count1 > count2) {
                    611:                return -1;
                    612:        }
                    613:        if (count1 < count2) {
                    614:                return 1;
                    615:        }
                    616:        return 0;
                    617: }
                    618: 
                    619: /**
                    620:  * Sort DSP profile data addresses by call counts and show the results.
                    621:  * If symbols are requested and symbols are loaded, show (only) addresses
                    622:  * matching a symbol.
                    623:  */
                    624: void Profile_DspShowCounts(unsigned int show, bool only_symbols)
                    625: {
                    626:        profile_item_t *data = dsp_profile.data;
                    627:        unsigned int symbols, matched, active;
                    628:        Uint16 *sort_arr, *end, addr;
                    629:        const char *name;
                    630:        float percentage;
                    631:        Uint32 count;
                    632: 
                    633:        if (!data) {
                    634:                fprintf(stderr, "ERROR: no DSP profiling data available!\n");
                    635:                return;
                    636:        }
                    637:        active = dsp_profile.ram.active;
                    638:        show = (show < active ? show : active);
                    639: 
                    640:        sort_arr = dsp_profile.sort_arr;
                    641:        qsort(sort_arr, active, sizeof(*sort_arr), profile_by_dsp_count);
                    642: 
                    643:        if (!only_symbols) {
                    644:                printf("addr:\tcount:\n");
                    645:                for (end = sort_arr + show; sort_arr < end; sort_arr++) {
                    646:                        addr = *sort_arr;
                    647:                        count = data[addr].count;
                    648:                        percentage = 100.0*count/dsp_profile.ram.all_count;
                    649:                        printf("0x%04x\t%.2f%%\t%d%s\n",
                    650:                               addr, percentage, count,
                    651:                               count == MAX_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    652:                }
                    653:                printf("%d DSP addresses listed.\n", show);
                    654:                return;
                    655:        }
                    656: 
                    657:        symbols = Symbols_DspCount();
                    658:        if (!symbols) {
                    659:                fprintf(stderr, "ERROR: no DSP symbols loaded!\n");
                    660:                return;
                    661:        }
                    662:        matched = 0;    
                    663: 
                    664:        printf("addr:\tcount:\t\tsymbol:\n");
                    665:        for (end = sort_arr + active; sort_arr < end; sort_arr++) {
                    666: 
                    667:                addr = *sort_arr;
                    668:                name = Symbols_GetByDspAddress(addr);
                    669:                if (!name) {
                    670:                        continue;
                    671:                }
                    672:                count = data[addr].count;
                    673:                percentage = 100.0*count/dsp_profile.ram.all_count;
                    674:                printf("0x%04x\t%.2f%%\t%d\t%s%s\n",
                    675:                       addr, percentage, count, name,
                    676:                       count == MAX_PROFILE_VALUE ? " (OVERFLOW)" : "");
                    677: 
                    678:                matched++;
                    679:                if (matched >= show || matched >= symbols) {
                    680:                        break;
                    681:                }
                    682:        }
                    683:        printf("%d DSP symbols listed.\n", matched);
                    684: }
                    685: 
                    686: 
                    687: /* ------------------ DSP profile control ----------------- */
                    688: 
                    689: /**
                    690:  * Initialize DSP profiling when necessary.  Return true if profiling.
                    691:  */
                    692: bool Profile_DspStart(void)
                    693: {
                    694:        if (dsp_profile.sort_arr) {
                    695:                /* remove previous results */
                    696:                free(dsp_profile.sort_arr);
                    697:                free(dsp_profile.data);
                    698:                dsp_profile.sort_arr = NULL;
                    699:                dsp_profile.data = NULL;
                    700:                printf("Freed previous DSP profile buffers.\n");
                    701:        }
                    702:        if (!dsp_profile.enabled) {
                    703:                return false;
                    704:        }
                    705: 
                    706:        dsp_profile.data = calloc(DSP_PROFILE_ARR_SIZE, sizeof(*dsp_profile.data));
                    707:        if (dsp_profile.data) {
                    708:                printf("Allocated DSP profile buffer (%d KB).\n",
                    709:                       (int)sizeof(*dsp_profile.data)*DSP_PROFILE_ARR_SIZE/1024);
                    710:        } else {
                    711:                perror("ERROR, new DSP profile buffer alloc failed");
                    712:                dsp_profile.enabled = false;
                    713:        }
                    714:        return dsp_profile.enabled;
                    715: }
                    716: 
                    717: /**
                    718:  * Update DSP cycle and count statistics for PC address.
                    719:  */
                    720: void Profile_DspUpdate(void)
                    721: {
                    722:        Uint16 pc, cycles;
                    723: 
                    724:        pc = DSP_GetPC();
                    725:        if (likely(dsp_profile.data[pc].count < MAX_PROFILE_VALUE)) {
                    726:                dsp_profile.data[pc].count++;
                    727:        }
                    728: 
                    729:        cycles = DSP_GetInstrCycles();
                    730:        if (likely(dsp_profile.data[pc].cycles < MAX_PROFILE_VALUE - cycles)) {
                    731:                dsp_profile.data[pc].cycles += cycles;
                    732:        }
                    733: }
                    734: 
                    735: 
                    736: /**
                    737:  * Stop and process the DSP profiling data; collect stats and
                    738:  * prepare for more optimal sorting.
                    739:  */
                    740: void Profile_DspStop(void)
                    741: {
                    742:        profile_item_t *item;
                    743:        profile_area_t *area;
                    744:        Uint16 *sort_arr;
                    745:        Uint32 i;
                    746: 
                    747:        if (!dsp_profile.enabled) {
                    748:                return;
                    749:        }
                    750:        /* find lowest and highest  addresses executed */
                    751:        item = dsp_profile.data;
                    752:        area = &dsp_profile.ram;
                    753:        memset(area, 0, sizeof(profile_area_t));
                    754:        area->lowest = DSP_PROFILE_ARR_SIZE;
                    755: 
                    756:        for (i = 0; i < DSP_PROFILE_ARR_SIZE; i++, item++) {
                    757:                update_area(i, item, area);
                    758:        }
                    759: 
                    760:        /* allocate address array for sorting */
                    761:        sort_arr = calloc(dsp_profile.ram.active, sizeof(*sort_arr));
                    762: 
                    763:        if (!sort_arr) {
                    764:                perror("ERROR: allocating DSP profile address data");
                    765:                free(dsp_profile.data);
                    766:                dsp_profile.data = NULL;
                    767:                return;
                    768:        }
                    769:        printf("Allocated DSP profile address buffer (%d KB).\n",
                    770:               (int)sizeof(*sort_arr)*(dsp_profile.ram.active+512)/1024);
                    771:        dsp_profile.sort_arr = sort_arr;
                    772: 
                    773:        /* ...and fill addresses for used instructions... */
                    774:        area = &dsp_profile.ram;
                    775:        item = dsp_profile.data + area->lowest;
                    776:        for (i = area->lowest; i <= area->highest; i++, item++) {
                    777:                if (item->count) {
                    778:                        *sort_arr++ = i;
                    779:                }
                    780:        }
                    781:        //printf("%d/%d/%d\n", area->active, sort_arr-dsp_profile.sort_arr, active);
                    782: 
                    783:        Profile_DspShowStats();
                    784:        return;
                    785: }
                    786: 
                    787: 
                    788: /* ------------------- command parsing ---------------------- */
                    789: 
                    790: /**
                    791:  * Readline match callback to list profile subcommand names.
                    792:  * STATE = 0 -> different text from previous one.
                    793:  * Return next match or NULL if no matches.
                    794:  */
                    795: char *Profile_Match(const char *text, int state)
                    796: {
                    797:        static const char *names[] = {
                    798:                "on", "off", "counts", "cycles", "symbols", "stats"
                    799:        };
                    800:        static int i, len;
                    801:        
                    802:        if (!state)
                    803:        {
                    804:                /* first match */
                    805:                i = 0;
                    806:                len = strlen(text);
                    807:        }
                    808:        /* next match */
                    809:        while (i < ARRAYSIZE(names)) {
                    810:                if (strncasecmp(names[i++], text, len) == 0)
                    811:                        return (strdup(names[i-1]));
                    812:        }
                    813:        return NULL;
                    814: }
                    815: 
                    816: const char Profile_Description[] =
                    817:          "<on|off|counts|cycles|symbols|stats> [show count]\n"
                    818:          "\ton & off enable and disable profiling.  Data is collected\n"
                    819:          "\tuntil debugger is entered again after which you can view\n"
                    820:          "\tstatistics about the data or view PC addresses that took\n"
                    821:          "\tmost cycles or functions/symbols called most often.\n"
                    822:          "\tYou can specify how many items are shown at most.";
                    823: 
                    824: 
                    825: /**
                    826:  * Command: CPU/DSP profiling enabling, exec stats, cycle and call stats.
                    827:  * Return for succesful command and false for incorrect ones.
                    828:  */
                    829: bool Profile_Command(int nArgc, char *psArgs[], bool bForDsp)
                    830: {
                    831:        static int show = 16;
                    832:        bool *enabled;
                    833:        
                    834:        if (nArgc < 2) {
                    835:                DebugUI_PrintCmdHelp(psArgs[0]);
                    836:                return true;
                    837:        }
                    838:        if (nArgc > 2) {
                    839:                show = atoi(psArgs[2]);
                    840:        }
                    841:        
                    842:        if (bForDsp) {
                    843:                enabled = &dsp_profile.enabled;
                    844:        } else {
                    845:                enabled = &cpu_profile.enabled;
                    846:        }
                    847:        if (strcmp(psArgs[1], "on") == 0) {
                    848:                *enabled = true;
                    849:                fprintf(stderr, "Profiling enabled.\n");
                    850:                return true;
                    851:        }
                    852:        if (strcmp(psArgs[1], "off") == 0) {
                    853:                *enabled = false;
                    854:                fprintf(stderr, "Profiling disabled.\n");
                    855:                return true;
                    856:        }
                    857:        
                    858:        if (strcmp(psArgs[1], "stats") == 0) {
                    859:                if (bForDsp) {
                    860:                        Profile_DspShowStats();
                    861:                } else {
                    862:                        Profile_CpuShowStats();
                    863:                }
                    864:        } else if (strcmp(psArgs[1], "cycles") == 0) {
                    865:                if (bForDsp) {
                    866:                        Profile_DspShowCycles(show);
                    867:                } else {
                    868:                        Profile_CpuShowCycles(show);
                    869:                }
                    870:        } else if (strcmp(psArgs[1], "counts") == 0) {
                    871:                if (bForDsp) {
                    872:                        Profile_DspShowCounts(show, false);
                    873:                } else {
                    874:                        Profile_CpuShowCounts(show, false);
                    875:                }
                    876:        } else if (strcmp(psArgs[1], "symbols") == 0)   {
                    877:                if (bForDsp) {
                    878:                        Profile_DspShowCounts(show, true);
                    879:                } else {
                    880:                        Profile_CpuShowCounts(show, true);
                    881:                }
                    882:        } else {
                    883:                DebugUI_PrintCmdHelp(psArgs[0]);
                    884:                return false;
                    885:        }
                    886:        return true;
                    887: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.