--- hatari/src/debug/symbols.c 2019/04/09 08:55:34 1.1.1.7 +++ hatari/src/debug/symbols.c 2019/04/09 08:58:03 1.1.1.9 @@ -1,7 +1,7 @@ /* * Hatari - symbols.c * - * Copyright (C) 2010-2015 by Eero Tamminen + * Copyright (C) 2010-2017 by Eero Tamminen * * This file is distributed under the GNU General Public License, version 2 * or at your option any later version. Read the file gpl.txt for details. @@ -10,7 +10,7 @@ * matching, TAB completion support etc. * * Symbol/address information is read either from: - * - A program file's DRI/GST format symbol table, or + * - A program file's DRI/GST or a.out format symbol table, or * - ASCII file which contents are subset of "nm" output i.e. composed of * a hexadecimal addresses followed by a space, letter indicating symbol * type (T = text/code, D = data, B = BSS), space and the symbol name. @@ -34,6 +34,7 @@ const char Symbols_fileid[] = "Hatari sy #include "debugInfo.h" #include "evaluate.h" #include "configuration.h" +#include "a.out.h" typedef struct { char *name; @@ -42,10 +43,13 @@ typedef struct { } symbol_t; typedef struct { - int count; /* final symbol count */ int symbols; /* initial symbol count */ - symbol_t *addresses; /* items sorted by address */ - symbol_t *names; /* items sorted by symbol name */ + int namecount; /* final symbol count */ + int codecount; /* TEXT symbol address count */ + int datacount; /* DATA/BSS symbol address count */ + symbol_t *addresses; /* TEXT + DATA/BSS items sorted by address */ + symbol_t *names; /* all items sorted by symbol name */ + char *strtab; } symbol_list_t; typedef struct { @@ -59,6 +63,11 @@ typedef struct { */ #define MAX_SYM_SIZE 32 +/* Magic used to denote different symbol table formats */ +#define SYMBOL_FORMAT_GNU 0x474E555f /* "MiNT" */ +#define SYMBOL_FORMAT_MINT 0x4D694E54 /* "GNU_" */ +#define SYMBOL_FORMAT_DRI 0x0 + /* TODO: add symbol name/address file names to configuration? */ static symbol_list_t *CpuSymbolsList; @@ -66,48 +75,133 @@ static symbol_list_t *DspSymbolsList; /* path for last loaded program (through GEMDOS HD emulation) */ static char *CurrentProgramPath; +/* whether current symbols were loaded from a program file */ static bool SymbolsAreForProgram; +/* prevent repeated failing on every debugger invocation */ static bool AutoLoadFailed; /* ------------------ load and free functions ------------------ */ /** - * compare function for qsort() to sort according to symbol address + * return true if given symbol name is object/library/file name + */ +static bool is_obj_file(const char *name) +{ + int len = strlen(name); + /* object (.a or .o) / file name? */ + if (len > 2 && ((name[len-2] == '.' && (name[len-1] == 'a' || name[len-1] == 'o')) || strchr(name, '/'))) { + return true; + } + return false; +} + +/** + * compare function for qsort() to sort according to + * symbol type & address. Text section symbols will + * be sorted first. */ static int symbols_by_address(const void *s1, const void *s2) { - Uint32 addr1 = ((const symbol_t*)s1)->address; - Uint32 addr2 = ((const symbol_t*)s2)->address; + const symbol_t *sym1 = (const symbol_t*)s1; + const symbol_t *sym2 = (const symbol_t*)s2; - if (addr1 < addr2) { + /* separate TEXT type addresses from others */ + if (sym1->type != sym2->type) { + if (sym1->type == SYMTYPE_TEXT) { + return -1; + } + if (sym2->type == SYMTYPE_TEXT) { + return 1; + } + } + /* then sort by address */ + if (sym1->address < sym2->address) { return -1; } - if (addr1 > addr2) { + if (sym1->address > sym2->address) { return 1; } - fprintf(stderr, "WARNING: symbols '%s' & '%s' have the same 0x%x address.\n", - ((const symbol_t*)s1)->name, ((const symbol_t*)s2)->name, addr1); return 0; } /** - * compare function for qsort() to sort according to symbol name + * compare function for qsort() to sort according to + * symbol name & address */ static int symbols_by_name(const void *s1, const void *s2) { - const char* name1 = ((const symbol_t*)s1)->name; - const char* name2 = ((const symbol_t*)s2)->name; + const symbol_t *sym1 = (const symbol_t*)s1; + const symbol_t *sym2 = (const symbol_t*)s2; int ret; - ret = strcmp(name1, name2); - if (!ret) { - fprintf(stderr, "WARNING: addresses 0x%x & 0x%x have the same '%s' name.\n", - ((const symbol_t*)s1)->address, ((const symbol_t*)s2)->address, name1); + /* first by name */ + ret = strcmp(sym1->name, sym2->name); + if (ret) { + return ret; } - return ret; + /* then by address */ + return (sym1->address - sym2->address); } +/** + * Check for duplicate addresses in symbol list + * (called separately for TEXT & non-TEXT symbols) + * Return number of duplicates + */ +static int symbols_check_addresses(const symbol_t *syms, int count) +{ + int i, j, dups = 0; + + for (i = 0; i < (count - 1); i++) + { + /* absolute symbols have values, not addresses */ + if (syms[i].type == SYMTYPE_ABS) { + continue; + } + for (j = i + 1; j < count && syms[i].address == syms[j].address; j++) { + if (syms[j].type == SYMTYPE_ABS) { + continue; + } + /* ASCII symbol files contain also object file addresses, + * those will often have the same address as the first symbol + * in given object -> no point warning about them + */ + if (is_obj_file(syms[i].name) || is_obj_file(syms[j].name)) { + continue; + } + fprintf(stderr, "WARNING: symbols '%s' & '%s' have the same 0x%x address\n", + syms[i].name, syms[j].name, syms[i].address); + dups++; + i = j; + } + } + return dups; +} + +/** + * Check for duplicate names in symbol list + * Return number of duplicates + */ +static int symbols_check_names(const symbol_t *syms, int count) +{ + int i, j, dups = 0; + + for (i = 0; i < (count - 1); i++) + { + for (j = i + 1; j < count && strcmp(syms[i].name, syms[j].name) == 0; j++) { + /* this is common case for object files having different sections */ + if (syms[i].type != syms[j].type && is_obj_file(syms[i].name)) { + continue; + } + fprintf(stderr, "WARNING: addresses 0x%x & 0x%x have the same '%s' name\n", + syms[i].address, syms[j].address, syms[i].name); + dups++; + i = j; + } + } + return dups; +} /** * Allocate symbol list & names for given number of items. @@ -153,11 +247,35 @@ static char symbol_char(int type) case SYMTYPE_TEXT: return 'T'; case SYMTYPE_DATA: return 'D'; case SYMTYPE_BSS: return 'B'; + case SYMTYPE_ABS: return 'A'; default: return '?'; } } -#define INVALID_SYMBOL_OFFSETS ((symbol_list_t*)1) +/** + * Return true if symbol name matches internal GCC symbol name, + * or is object / file name. + */ +static bool symbol_remove_obj(const char *name) +{ + static const char *gcc_sym[] = { + "___gnu_compiled_c", + "gcc2_compiled." + }; + int i; + + if (is_obj_file(name)) { + return true; + } + /* useless symbols GCC (v2) seems to add to every object? */ + for (i = 0; i < ARRAY_SIZE(gcc_sym); i++) { + if (strcmp(name, gcc_sym[i]) == 0) { + return true; + } + } + return false; +} + /** * Load symbols of given type and the symbol address addresses from @@ -167,8 +285,8 @@ static char symbol_char(int type) */ static symbol_list_t* symbols_load_dri(FILE *fp, prg_section_t *sections, symtype_t gettype, Uint32 tablesize) { - int i, count, symbols, len, outside; - int dtypes, locals, ofiles; + int i, count, symbols, invalid; + int notypes, dtypes, locals, ofiles; prg_section_t *section; symbol_list_t *list; symtype_t symtype; @@ -186,7 +304,7 @@ static symbol_list_t* symbols_load_dri(F return NULL; } - outside = dtypes = ofiles = locals = count = 0; + invalid = dtypes = notypes = ofiles = locals = count = 0; for (i = 1; i <= symbols; i++) { /* read DRI symbol table slot */ if (fread(name, 8, 1, fp) != 1 || @@ -228,33 +346,35 @@ static symbol_list_t* symbols_load_dri(F dtypes++; continue; } + if ((symid & 0x4000) == 0x4000) { + symtype = SYMTYPE_ABS; + section = NULL; + break; + } fprintf(stderr, "WARNING: ignoring symbol '%s' in slot %d of unknown type 0x%x.\n", name, i, symid); + invalid++; continue; } if (!(gettype & symtype)) { + notypes++; continue; } if (name[0] == '.' && name[1] == 'L') { locals++; continue; } - len = strlen(name); - if (strchr(name, '/') || (len > 2 && name[len-2] == '.' && name[len-1] == 'o')) { + if (symbol_remove_obj(name)) { ofiles++; continue; } - address += section->offset; - if (address > section->end) { - /* VBCC has 1 symbol outside of its section */ - if (++outside > 2) { - /* potentially buggy version of VBCC vlink used */ - fprintf(stderr, "ERROR: too many invalid offsets, skipping rest of symbols!\n"); - symbol_list_free(list); - return INVALID_SYMBOL_OFFSETS; + if (section) { + address += section->offset; + if (address > section->end) { + fprintf(stderr, "WARNING: ignoring symbol '%s' of type %c in slot %d with invalid offset 0x%x (>= 0x%x).\n", + name, symbol_char(symtype), i, address, section->end); + invalid++; + continue; } - fprintf(stderr, "WARNING: ignoring symbol '%s' of %c type in slot %d with invalid offset 0x%x (>= 0x%x).\n", - name, symbol_char(symtype), i, address, section->end); - continue; } list->names[count].address = address; list->names[count].type = symtype; @@ -267,8 +387,22 @@ static symbol_list_t* symbols_load_dri(F symbol_list_free(list); return NULL; } + list->symbols = symbols; + list->namecount = count; + + /* skip verbose output when symbol loading is forced */ + if (ConfigureParams.Debugger.bSymbolsResident) { + return list; + } + + if (invalid) { + fprintf(stderr, "NOTE: ignored %d invalid symbols.\n", invalid); + } if (dtypes) { - fprintf(stderr, "NOTE: ignored %d globally defined equated values.\n", dtypes); + fprintf(stderr, "NOTE: ignored %d debugging symbols.\n", dtypes); + } + if (notypes) { + fprintf(stderr, "NOTE: ignored %d other unwanted symbol types.\n", notypes); } if (locals) { fprintf(stderr, "NOTE: ignored %d unnamed / local symbols (= name starts with '.L').\n", locals); @@ -279,10 +413,210 @@ static symbol_list_t* symbols_load_dri(F * addition to object file addresses conflicting with * first symbol in the object file. */ - fprintf(stderr, "NOTE: ignored %d object file names (= name has '/' or ends in '.o').\n", ofiles); + fprintf(stderr, "NOTE: ignored %d object symbols (= name has '/', ends in '.[ao]' or is GCC internal).\n", ofiles); + } + return list; +} + + +/** + * Load symbols of given type and the symbol address addresses from + * a.out format symbol table, and add given offsets to the addresses: + * Return symbols list or NULL for failure. + */ +static symbol_list_t* symbols_load_gnu(FILE *fp, prg_section_t *sections, symtype_t gettype, Uint32 tablesize, Uint32 stroff, Uint32 strsize) +{ + size_t slots = tablesize / SIZEOF_STRUCT_NLIST; + size_t i; + size_t strx; + unsigned char *p; + char *name; + symbol_t *sym; + symtype_t symtype; + uint32_t address; + uint32_t nread; + symbol_list_t *list; + unsigned char n_type; + unsigned char n_other; + unsigned short n_desc; + static char dummy[] = ""; + int dtypes, locals, ofiles, count, notypes, invalid, weak; + prg_section_t *section; + + if (!(list = symbol_list_alloc(slots))) { + return NULL; + } + + list->strtab = (char *)malloc(tablesize + strsize); + + if (list->strtab == NULL) + { + symbol_list_free(list); + return NULL; + } + + nread = fread(list->strtab, tablesize + strsize, 1, fp); + if (nread != 1) + { + perror("ERROR: reading symbols failed"); + symbol_list_free(list); + return NULL; + } + + p = (unsigned char *)list->strtab; + sym = list->names; + + weak = invalid = dtypes = notypes = ofiles = locals = count = 0; + for (i = 0; i < slots; i++) + { + strx = SDL_SwapBE32(*(Uint32*)p); + p += 4; + n_type = *p++; + n_other = *p++; + n_desc = SDL_SwapBE16(*(Uint16*)p); + p += 2; + address = SDL_SwapBE32(*(Uint32*)p); + p += 4; + name = dummy; + if (!strx) { + invalid++; + continue; + } + if (strx >= strsize) { + fprintf(stderr, "symbol name index %x out of range\n", (unsigned int)strx); + invalid++; + continue; + } + name = list->strtab + strx + stroff; + + if (n_type & N_STAB) + { + dtypes++; + continue; + } + section = NULL; + switch (n_type & (N_TYPE|N_EXT)) + { + case N_UNDF: + case N_UNDF|N_EXT: + /* shouldn't happen here */ + weak++; + continue; + case N_ABS: + case N_ABS|N_EXT: + symtype = SYMTYPE_ABS; + break; + case N_TEXT: + case N_TEXT|N_EXT: + symtype = SYMTYPE_TEXT; + section = &(sections[0]); + break; + case N_DATA: + case N_DATA|N_EXT: + symtype = SYMTYPE_DATA; + section = &(sections[1]); + break; + case N_BSS: + case N_BSS|N_EXT: + case N_COMM: + case N_COMM|N_EXT: + symtype = SYMTYPE_BSS; + section = &(sections[2]); + break; + case N_FN: /* filenames, not object addresses? */ + dtypes++; + continue; + case N_SIZE: + case N_WARNING: + case N_SETA: + case N_SETT: + case N_SETD: + case N_SETB: + case N_SETV: + dtypes++; + continue; + case N_WEAKU: + case N_WEAKT: + case N_WEAKD: + case N_WEAKB: + weak++; + continue; + default: + fprintf(stderr, "WARNING: ignoring symbol '%s' in slot %u of unknown type 0x%x.\n", name, (unsigned int)i, n_type); + invalid++; + continue; + } + /* + * the value of a common symbol is its size, not its address: + */ + if (((n_type & N_TYPE) == N_COMM) || + (((n_type & N_EXT) && (n_type & N_TYPE) == N_UNDF && address != 0))) + { + /* if we ever want to know a symbols size, get that here */ + fprintf(stderr, "WARNING: ignoring common symbol '%s' in slot %u.\n", name, (unsigned int)i); + dtypes++; + continue; + } + if (!(gettype & symtype)) { + notypes++; + continue; + } + if (name[0] == '.' && name[1] == 'L') { + locals++; + continue; + } + if (symbol_remove_obj(name)) { + ofiles++; + continue; + } + if (section) { + address += sections[0].offset; /* all GNU symbol addresses are TEXT relative */ + if (address > section->end) { + fprintf(stderr, "WARNING: ignoring symbol '%s' of type %c in slot %u with invalid offset 0x%x (>= 0x%x).\n", + name, symbol_char(symtype), (unsigned int)i, address, section->end); + invalid++; + continue; + } + } + sym->address = address; + sym->type = symtype; + sym->name = name; + sym++; + count++; + (void) n_desc; + (void) n_other; + } + list->symbols = slots; + list->namecount = count; + + /* skip verbose output when symbol loading is forced */ + if (ConfigureParams.Debugger.bSymbolsResident) { + return list; + } + + if (invalid) { + fprintf(stderr, "NOTE: ignored %d invalid symbols.\n", invalid); + } + if (dtypes) { + fprintf(stderr, "NOTE: ignored %d debugging symbols.\n", dtypes); + } + if (weak) { + fprintf(stderr, "NOTE: ignored %d weak / undefined symbols.\n", weak); + } + if (notypes) { + fprintf(stderr, "NOTE: ignored %d other unwanted symbol types.\n", notypes); + } + if (locals) { + fprintf(stderr, "NOTE: ignored %d unnamed / local symbols (= name starts with '.L').\n", locals); + } + if (ofiles) { + /* object file path names most likely get truncated and + * as result cause unnecessary symbol name conflicts in + * addition to object file addresses conflicting with + * first symbol in the object file. + */ + fprintf(stderr, "NOTE: ignored %d object symbols (= name has '/', ends in '.[ao]' or is GCC internal).\n", ofiles); } - list->symbols = symbols; - list->count = count; return list; } @@ -307,10 +641,13 @@ static bool symbols_print_prg_info(Uint3 int i; switch (tabletype) { - case 0x4D694E54: /* "MiNT" */ + case SYMBOL_FORMAT_MINT: /* "MiNT" */ info = "GCC/MiNT executable, GST symbol table"; break; - case 0x0: + case SYMBOL_FORMAT_GNU: /* "GNU_" */ + info = "GCC/MiNT executable, a.out symbol table"; + break; + case SYMBOL_FORMAT_DRI: info = "TOS executable, DRI / GST symbol table"; break; default: @@ -319,7 +656,7 @@ static bool symbols_print_prg_info(Uint3 } fprintf(stderr, "%s, reloc=%d, program flags:", info, relocflag); /* bit flags */ - for (i = 0; i < ARRAYSIZE(flags); i++) { + for (i = 0; i < ARRAY_SIZE(flags); i++) { if (prgflags & flags[i].flag) { fprintf(stderr, " %s", flags[i].name); } @@ -347,6 +684,9 @@ static symbol_list_t* symbols_load_binar int offset, reads = 0; Uint16 relocflag; symbol_list_t* symbols; + Uint32 symoff = 0; + Uint32 stroff = 0; + Uint32 strsize = 0; /* get TEXT, DATA & BSS section sizes */ fseek(fp, 2, SEEK_SET); @@ -373,6 +713,76 @@ static symbol_list_t* symbols_load_binar fprintf(stderr, "ERROR: program header reading failed!\n"); return NULL; } + /* + * check for GNU-style symbol table in aexec header + */ + if (tabletype == SYMBOL_FORMAT_MINT) { /* MiNT */ + Uint32 magic1, magic2; + Uint32 dummy; + Uint32 a_text, a_data, a_bss, a_syms, a_entry, a_trsize, a_drsize; + Uint32 g_tparel_pos, g_tparel_size, g_stkpos, g_symbol_format; + + reads = fread(&magic1, sizeof(magic1), 1, fp); + magic1 = SDL_SwapBE32(magic1); + reads += fread(&magic2, sizeof(magic2), 1, fp); + magic2 = SDL_SwapBE32(magic2); + if (reads == 2 && + ((magic1 == 0x283a001a && magic2 == 0x4efb48fa) || /* Original binutils: move.l 28(pc),d4; jmp 0(pc,d4.l) */ + (magic1 == 0x203a001a && magic2 == 0x4efb08fa))) { /* binutils >= 2.18-mint-20080209: move.l 28(pc),d0; jmp 0(pc,d0.l) */ + reads += fread(&dummy, sizeof(dummy), 1, fp); /* skip a_info */ + reads += fread(&a_text, sizeof(a_text), 1, fp); + a_text = SDL_SwapBE32(a_text); + reads += fread(&a_data, sizeof(a_data), 1, fp); + a_data = SDL_SwapBE32(a_data); + reads += fread(&a_bss, sizeof(a_bss), 1, fp); + a_bss = SDL_SwapBE32(a_bss); + reads += fread(&a_syms, sizeof(a_syms), 1, fp); + a_syms = SDL_SwapBE32(a_syms); + reads += fread(&a_entry, sizeof(a_entry), 1, fp); + a_entry = SDL_SwapBE32(a_entry); + reads += fread(&a_trsize, sizeof(a_trsize), 1, fp); + a_trsize = SDL_SwapBE32(a_trsize); + reads += fread(&a_drsize, sizeof(a_drsize), 1, fp); + a_drsize = SDL_SwapBE32(a_drsize); + reads += fread(&g_tparel_pos, sizeof(g_tparel_pos), 1, fp); + g_tparel_pos = SDL_SwapBE32(g_tparel_pos); + reads += fread(&g_tparel_size, sizeof(g_tparel_size), 1, fp); + g_tparel_size = SDL_SwapBE32(g_tparel_size); + reads += fread(&g_stkpos, sizeof(g_stkpos), 1, fp); + g_stkpos = SDL_SwapBE32(g_stkpos); + reads += fread(&g_symbol_format, sizeof(g_symbol_format), 1, fp); + g_symbol_format = SDL_SwapBE32(g_symbol_format); + if (g_symbol_format == 0) + { + tabletype = SYMBOL_FORMAT_GNU; + } + if ((a_text + (256 - 28)) != textlen) + fprintf(stderr, "warning: inconsistent text segment size %08x != %08x\n", textlen, a_text + (256 - 28)); + if (a_data != datalen) + fprintf(stderr, "warning: inconsistent data segment size %08x != %08x\n", datalen, a_data); + if (a_bss != bsslen) + fprintf(stderr, "warning: inconsistent bss segment size %08x != %08x\n", bsslen, a_bss); + /* + * the symbol table size in the GEMDOS header includes the string table, + * the symbol table size in the exec header does not. + */ + if (tabletype == SYMBOL_FORMAT_GNU) + { + strsize = tablesize - a_syms; + tablesize = a_syms; + stroff = a_syms; + } + + textlen = a_text + (256 - 28); + datalen = a_data; + bsslen = a_bss; + symoff = 0x100 + /* sizeof(extended exec header) */ + a_text + + a_data + + a_trsize + + a_drsize; + } + } if (!symbols_print_prg_info(tabletype, prgflags, relocflag)) { return NULL; } @@ -389,39 +799,44 @@ static symbol_list_t* symbols_load_binar } sections[0].offset = start; sections[0].end = start + textlen; - if (DebugInfo_GetTEXTEnd() != sections[0].end - 1) { + if (DebugInfo_GetTEXTEnd() != sections[0].end) { fprintf(stderr, "ERROR: given program TEXT section size differs from one in RAM!\n"); return NULL; } start = DebugInfo_GetDATA(); sections[1].offset = start; - sections[1].end = start + datalen - 1; + sections[1].end = start + datalen; start = DebugInfo_GetBSS(); sections[2].offset = start; - sections[2].end = start + bsslen - 1; + sections[2].end = start + bsslen; - /* go to start of symbol table */ - offset = 0x1C + textlen + datalen; - if (fseek(fp, offset, SEEK_SET) < 0) { - perror("ERROR: seeking to symbol table failed"); - return NULL; - } - fprintf(stderr, "Trying to load symbol table at offset 0x%x...\n", offset); - symbols = symbols_load_dri(fp, sections, gettype, tablesize); - - if (symbols == INVALID_SYMBOL_OFFSETS && fseek(fp, offset, SEEK_SET) == 0) { - fprintf(stderr, "Re-trying with TEXT-relative BSS/DATA section offsets...\n"); - start = DebugInfo_GetTEXT(); - sections[1].offset = start; - sections[2].offset = start; - sections[1].end += textlen; - sections[2].end += (textlen + datalen); - symbols = symbols_load_dri(fp, sections, gettype, tablesize); + if (sections[0].end != sections[1].offset) { + fprintf(stderr, "WARNIGN: DATA start doesn't match TEXT start + size!\n"); } - if (symbols == INVALID_SYMBOL_OFFSETS) { - return NULL; + if (sections[1].end != sections[2].offset) { + fprintf(stderr, "WARNIGN: BSS start doesn't match DATA start + size!\n"); + } + + if (tabletype == SYMBOL_FORMAT_GNU) { + /* go to start of symbol table */ + offset = symoff; + if (fseek(fp, offset, SEEK_SET) < 0) { + perror("ERROR: seeking to symbol table failed"); + return NULL; + } + fprintf(stderr, "Trying to load symbol table at offset 0x%x...\n", offset); + symbols = symbols_load_gnu(fp, sections, gettype, tablesize, stroff, strsize); + } else { + /* go to start of symbol table */ + offset = 0x1C + textlen + datalen; + if (fseek(fp, offset, SEEK_SET) < 0) { + perror("ERROR: seeking to symbol table failed"); + return NULL; + } + fprintf(stderr, "Trying to load symbol table at offset 0x%x...\n", offset); + symbols = symbols_load_dri(fp, sections, gettype, tablesize); } return symbols; } @@ -440,8 +855,10 @@ static symbol_list_t* symbols_load_ascii symtype_t symtype; /* count content lines */ - symbols = 0; + line = symbols = 0; while (fgets(buffer, sizeof(buffer), fp)) { + line++; + /* skip comments (AHCC SYM file comments start with '*') */ if (*buffer == '#' || *buffer == '*') { continue; @@ -451,10 +868,15 @@ static symbol_list_t* symbols_load_ascii if (!*buf) { continue; } + if (!isxdigit(*buf)) { + fprintf(stderr, "ERROR: line %d doesn't start with an address.\n", line); + return NULL; + } symbols++; } if (!symbols) { fprintf(stderr, "ERROR: no symbols.\n"); + return NULL; } fseek(fp, 0, SEEK_SET); @@ -495,6 +917,10 @@ static symbol_list_t* symbols_load_ascii symtype = SYMTYPE_BSS; offset = offsets[2]; break; + case 'A': + symtype = SYMTYPE_ABS; + offset = 0; + break; default: fprintf(stderr, "WARNING: unrecognized symbol type '%c' on line %d, skipping.\n", symchar, line); continue; @@ -514,11 +940,59 @@ static symbol_list_t* symbols_load_ascii count++; } list->symbols = symbols; - list->count = count; + list->namecount = count; return list; } /** + * Remove full duplicates from the sorted names list + * and trim the allocation to remaining symbols + */ +static void symbols_trim_names(symbol_list_t* list) +{ + symbol_t *sym = list->names; + int i, next, count, dups; + + count = list->namecount; + for (dups = i = 0; i < count - 1; i++) { + next = i + 1; + if (strcmp(sym[i].name, sym[next].name) == 0 && + sym[i].address == sym[next].address && + sym[i].type == sym[next].type) { + /* remove duplicate */ + memmove(sym+i, sym+next, (count-next) * sizeof(symbol_t)); + count--; + dups++; + } + } + if (dups || list->namecount < list->symbols) { + list->names = realloc(list->names, i * sizeof(symbol_t)); + assert(list->names); + list->namecount = i; + } + if (dups) { + fprintf(stderr, "WARNING: removed %d complete symbol duplicates\n", dups); + } +} + +/** + * Separate TEXT symbols from other symbols in address list. + */ +static void symbols_trim_addresses(symbol_list_t* list) +{ + symbol_t *sym = list->addresses; + int i; + + for (i = 0; i < list->namecount; i++) { + if (sym[i].type != SYMTYPE_TEXT) { + break; + } + } + list->codecount = i; + list->datacount = list->namecount - i; +} + +/** * Load symbols of given type and the symbol address addresses from * the given file and add given offsets to the addresses. * Return symbols list or NULL for failure. @@ -557,27 +1031,42 @@ static symbol_list_t* Symbols_Load(const return NULL; } - if (list->count < list->symbols) { - if (!list->count) { - fprintf(stderr, "ERROR: no valid symbols in '%s', loading failed!\n", filename); - symbol_list_free(list); - return NULL; - } - /* parsed less than there were "content" lines */ - list->names = realloc(list->names, list->count * sizeof(symbol_t)); - assert(list->names); + if (!list->namecount) { + fprintf(stderr, "ERROR: no valid symbols in '%s', loading failed!\n", filename); + symbol_list_free(list); + return NULL; } + /* sort and trim names list */ + qsort(list->names, list->namecount, sizeof(symbol_t), symbols_by_name); + symbols_trim_names(list); + /* copy name list to address list */ - list->addresses = malloc(list->count * sizeof(symbol_t)); + list->addresses = malloc(list->namecount * sizeof(symbol_t)); assert(list->addresses); - memcpy(list->addresses, list->names, list->count * sizeof(symbol_t)); + memcpy(list->addresses, list->names, list->namecount * sizeof(symbol_t)); - /* sort both lists, with different criteria */ - qsort(list->addresses, list->count, sizeof(symbol_t), symbols_by_address); - qsort(list->names, list->count, sizeof(symbol_t), symbols_by_name); + /* sort address list and trim to contain just TEXT symbols */ + qsort(list->addresses, list->namecount, sizeof(symbol_t), symbols_by_address); + symbols_trim_addresses(list); - fprintf(stderr, "Loaded %d symbols from '%s'.\n", list->count, filename); + /* skip verbose output when symbol loading is forced */ + if (!ConfigureParams.Debugger.bSymbolsResident) { + /* check for duplicate names */ + if (symbols_check_names(list->names, list->namecount)) { + fprintf(stderr, "-> Hatari symbol expansion can match only one of the addresses for name duplicates!\n"); + } + /* check for duplicate TEXT & other addresses */ + if (symbols_check_addresses(list->addresses, list->codecount)) { + fprintf(stderr, "-> Hatari profile/dissassembly will show only one of the TEXT symbols for given address!\n"); + } + if (symbols_check_addresses(list->addresses + list->codecount, list->datacount)) { + fprintf(stderr, "-> Hatari dissassembly will show only one of the symbols for given address!\n"); + } + } + + fprintf(stderr, "Loaded %d symbols (%d TEXT) from '%s'.\n", + list->namecount, list->codecount, filename); return list; } @@ -592,17 +1081,24 @@ static void Symbols_Free(symbol_list_t* if (!list) { return; } - assert(list->count); - for (i = 0; i < list->count; i++) { - free(list->names[i].name); + assert(list->namecount); + if (list->strtab) { + free(list->strtab); + list->strtab = NULL; + } else { + for (i = 0; i < list->namecount; i++) { + free(list->names[i].name); + } } free(list->addresses); free(list->names); /* catch use of freed list */ list->addresses = NULL; + list->codecount = 0; + list->datacount = 0; list->names = NULL; - list->count = 0; + list->namecount = 0; free(list); } @@ -631,7 +1127,7 @@ static char* Symbols_MatchByName(symbol_ /* next match */ entry = list->names; - while (i < list->count) { + while (i < list->namecount) { if ((entry[i].type & symtype) && strncmp(entry[i].name, text, len) == 0) { return strdup(entry[i++].name); @@ -653,11 +1149,19 @@ char* Symbols_MatchCpuAddress(const char } char* Symbols_MatchCpuCodeAddress(const char *text, int state) { - return Symbols_MatchByName(CpuSymbolsList, SYMTYPE_TEXT, text, state); + if (ConfigureParams.Debugger.bMatchAllSymbols) { + return Symbols_MatchByName(CpuSymbolsList, SYMTYPE_ALL, text, state); + } else { + return Symbols_MatchByName(CpuSymbolsList, SYMTYPE_TEXT, text, state); + } } char* Symbols_MatchCpuDataAddress(const char *text, int state) { - return Symbols_MatchByName(CpuSymbolsList, SYMTYPE_DATA|SYMTYPE_BSS, text, state); + if (ConfigureParams.Debugger.bMatchAllSymbols) { + return Symbols_MatchByName(CpuSymbolsList, SYMTYPE_ALL, text, state); + } else { + return Symbols_MatchByName(CpuSymbolsList, SYMTYPE_DATA|SYMTYPE_BSS, text, state); + } } /** @@ -682,23 +1186,17 @@ char* Symbols_MatchDspDataAddress(const /* ---------------- symbol name -> address search ------------------ */ /** - * Search symbol of given type by name. + * Binary search symbol of given type by name. * Return symbol if name matches, zero otherwise. */ -static const symbol_t* Symbols_SearchByName(symbol_list_t* list, symtype_t symtype, const char *name) +static const symbol_t* Symbols_SearchByName(symbol_t* entries, int count, symtype_t symtype, const char *name) { - symbol_t *entries; /* left, right, middle */ int l, r, m, dir; - if (!list) { - return NULL; - } - entries = list->names; - /* bisect */ l = 0; - r = list->count - 1; + r = count - 1; do { m = (l+r) >> 1; dir = strcmp(entries[m].name, name); @@ -715,55 +1213,47 @@ static const symbol_t* Symbols_SearchByN } /** - * Set given CPU symbol's address to variable and return TRUE if one was found. + * Set given symbol's address to variable and return true if one + * was found from given list. */ -bool Symbols_GetCpuAddress(symtype_t symtype, const char *name, Uint32 *addr) +static bool Symbols_GetAddress(symbol_list_t* list, symtype_t symtype, const char *name, Uint32 *addr) { const symbol_t *entry; - entry = Symbols_SearchByName(CpuSymbolsList, symtype, name); + if (!(list && list->names)) { + return false; + } + entry = Symbols_SearchByName(list->names, list->namecount, symtype, name); if (entry) { *addr = entry->address; return true; } return false; } - -/** - * Set given DSP symbol's address to variable and return TRUE if one was found. - */ +bool Symbols_GetCpuAddress(symtype_t symtype, const char *name, Uint32 *addr) +{ + return Symbols_GetAddress(CpuSymbolsList, symtype, name, addr); +} bool Symbols_GetDspAddress(symtype_t symtype, const char *name, Uint32 *addr) { - const symbol_t *entry; - entry = Symbols_SearchByName(DspSymbolsList, symtype, name); - if (entry) { - *addr = entry->address; - return true; - } - return false; + return Symbols_GetAddress(DspSymbolsList, symtype, name, addr); } /* ---------------- symbol address -> name search ------------------ */ /** - * Search symbol by address. + * Binary search symbol by address in given sorted list. * Return symbol index if address matches, -1 otherwise. */ -static int Symbols_SearchByAddress(symbol_list_t* list, Uint32 addr) +static int Symbols_SearchByAddress(symbol_t* entries, int count, Uint32 addr) { - symbol_t *entries; /* left, right, middle */ int l, r, m; Uint32 curr; - if (!list) { - return -1; - } - entries = list->addresses; - /* bisect */ l = 0; - r = list->count - 1; + r = count - 1; do { m = (l+r) >> 1; curr = entries[m].address; @@ -780,60 +1270,69 @@ static int Symbols_SearchByAddress(symbo } /** - * Search CPU symbol by address. - * Return symbol name if address matches, NULL otherwise. + * Search symbol in given list by type & address. + * Return symbol name if there's a match, NULL otherwise. + * TEXT symbols will be matched before other symbol types. * Returned name is valid only until next Symbols_* function call. */ -const char* Symbols_GetByCpuAddress(Uint32 addr) +static const char* Symbols_GetByAddress(symbol_list_t* list, Uint32 addr, symtype_t type) { - int idx = Symbols_SearchByAddress(CpuSymbolsList, addr); - if (idx < 0) { + if (!(list && list->addresses)) { return NULL; } - return CpuSymbolsList->addresses[idx].name; + if (type & SYMTYPE_TEXT) { + int i = Symbols_SearchByAddress(list->addresses, list->codecount, addr); + if (i >= 0) { + return list->addresses[i].name; + } + } + if (type & ~SYMTYPE_TEXT) { + int i = Symbols_SearchByAddress(list->addresses + list->codecount, list->datacount, addr); + if (i >= 0) { + return list->addresses[list->codecount + i].name; + } + } + return NULL; } -/** - * Search DSP symbol by address. - * Return symbol name if address matches, NULL otherwise. - * Returned name is valid only until next Symbols_* function call. - */ -const char* Symbols_GetByDspAddress(Uint32 addr) +const char* Symbols_GetByCpuAddress(Uint32 addr, symtype_t type) { - int idx = Symbols_SearchByAddress(DspSymbolsList, addr); - if (idx < 0) { - return NULL; - } - return DspSymbolsList->addresses[idx].name; + return Symbols_GetByAddress(CpuSymbolsList, addr, type); } - -/** - * Search CPU symbol by address. - * Return symbol index if address matches, -1 otherwise. - */ -int Symbols_GetCpuAddressIndex(Uint32 addr) +const char* Symbols_GetByDspAddress(Uint32 addr, symtype_t type) { - return Symbols_SearchByAddress(CpuSymbolsList, addr); + return Symbols_GetByAddress(DspSymbolsList, addr, type); } /** - * Search DSP symbol by address. + * Search given list for TEXT symbol by address. * Return symbol index if address matches, -1 otherwise. */ -int Symbols_GetDspAddressIndex(Uint32 addr) +static int Symbols_GetCodeIndex(symbol_list_t* list, Uint32 addr) +{ + if (!list) { + return -1; + } + return Symbols_SearchByAddress(list->addresses, list->codecount, addr); +} +int Symbols_GetCpuCodeIndex(Uint32 addr) +{ + return Symbols_GetCodeIndex(CpuSymbolsList, addr); +} +int Symbols_GetDspCodeIndex(Uint32 addr) { - return Symbols_SearchByAddress(DspSymbolsList, addr); + return Symbols_GetCodeIndex(DspSymbolsList, addr); } /** - * Return how many symbols are loaded/available + * Return how many TEXT symbols are loaded/available */ -int Symbols_CpuCount(void) +int Symbols_CpuCodeCount(void) { - return (CpuSymbolsList ? CpuSymbolsList->count : 0); + return (CpuSymbolsList ? CpuSymbolsList->codecount : 0); } -int Symbols_DspCount(void) +int Symbols_DspCodeCount(void) { - return (DspSymbolsList ? DspSymbolsList->count : 0); + return (DspSymbolsList ? DspSymbolsList->codecount : 0); } /* ---------------- symbol showing ------------------ */ @@ -841,52 +1340,75 @@ int Symbols_DspCount(void) /** * Show symbols from given list with paging. */ -static void Symbols_Show(symbol_list_t* list, const char *sorttype) +static void Symbols_Show(symbol_list_t* list, const char *sortcmd) { symbol_t *entry, *entries; + const char *symtype, *sorttype; + int i, rows, count; char symchar; - int i; + char line[80]; if (!list) { fprintf(stderr, "No symbols!\n"); return; } - if (strcmp("addr", sorttype) == 0) { + if (strcmp("code", sortcmd) == 0) { + sorttype = "address"; entries = list->addresses; + count = list->codecount; + symtype = " TEXT"; + } else if (strcmp("data", sortcmd) == 0) { + sorttype = "address"; + entries = list->addresses + list->codecount; + count = list->datacount; + symtype = " DATA/BSS/ABS"; } else { + sorttype = "name"; entries = list->names; + count = list->namecount; + symtype = ""; } - fprintf(stderr, "%s symbols sorted by %s:\n", - (list == CpuSymbolsList ? "CPU" : "DSP"), sorttype); + rows = DebugUI_GetPageLines(ConfigureParams.Debugger.nSymbolLines, 20); - for (entry = entries, i = 0; i < list->count; i++, entry++) { + for (entry = entries, i = 0; i < count; i++, entry++) { symchar = symbol_char(entry->type); fprintf(stderr, "0x%08x %c %s\n", entry->address, symchar, entry->name); - if (i && i % 20 == 0) { + if ((i + 1) % rows == 0) { fprintf(stderr, "--- q to exit listing, just enter to continue --- "); - if (toupper(getchar()) == 'Q') { - return; + if (fgets(line, sizeof(line), stdin) == NULL || + toupper(line[0]) == 'Q') { + break; } } } + fprintf(stderr, "%d %s%s symbols (of %d) sorted by %s.\n", i, + (list == CpuSymbolsList ? "CPU" : "DSP"), + symtype, count, sorttype); } /* ---------------- binary load handling ------------------ */ - /** - * Remove last opened program path. + * If symbols are set resident, load them if they aren't yet loaded, + * otherwise remove them along with program path. + * + * Called on GEMDOS reset and when program terminates + * (unless terminated with Ptermres()). */ void Symbols_RemoveCurrentProgram(void) { if (CurrentProgramPath) { + if (ConfigureParams.Debugger.bSymbolsResident) { + Symbols_LoadCurrentProgram(); + } free(CurrentProgramPath); CurrentProgramPath = NULL; - if (SymbolsAreForProgram) { + if (CpuSymbolsList && SymbolsAreForProgram && !ConfigureParams.Debugger.bSymbolsResident) { Symbols_Free(CpuSymbolsList); + fprintf(stderr, "Program exit, removing its symbols.\n"); CpuSymbolsList = NULL; } } @@ -894,18 +1416,33 @@ void Symbols_RemoveCurrentProgram(void) } /** - * Set last opened program path. + * Set last opened program path and remove symbols if they + * didn't get remove beforehand. + * + * Called on first Fopen() after Pexec(). */ void Symbols_ChangeCurrentProgram(const char *path) { if (Opt_IsAtariProgram(path)) { - Symbols_RemoveCurrentProgram(); + if (ConfigureParams.Debugger.bSymbolsResident) { + if (CpuSymbolsList && SymbolsAreForProgram) { + Symbols_Free(CpuSymbolsList); + fprintf(stderr, "Program launch, removing previous program symbols.\n"); + CpuSymbolsList = NULL; + } + if (CurrentProgramPath) { + free(CurrentProgramPath); + } + } else { + Symbols_RemoveCurrentProgram(); + } CurrentProgramPath = strdup(path); } } /** * Load symbols for last opened program. + * Called when debugger is invoked. */ void Symbols_LoadCurrentProgram(void) { @@ -931,34 +1468,54 @@ void Symbols_LoadCurrentProgram(void) char *Symbols_MatchCommand(const char *text, int state) { static const char* subs[] = { - "addr", "free", "name", "prg" + "code", "data", "free", "match", "name", "prg", "resident" }; - return DebugUI_MatchHelper(subs, ARRAYSIZE(subs), text, state); + return DebugUI_MatchHelper(subs, ARRAY_SIZE(subs), text, state); } const char Symbols_Description[] = - " [ [ ]]\n" - "\tLoads symbol names and their addresses from the given file.\n" - "\tIf there were previously loaded symbols, they're replaced.\n" + "<|code|data|name> -- list symbols\n" + "\tsymbols -- load/free symbols\n" + "\tsymbols [ [ ]]\n" + "\tsymbols -- toggle symbol options\n" "\n" - "\tGiving 'prg' instead of a file name, loads DRI/GST symbol table\n" - "\tfrom the last program executed through the GEMDOS HD emulation.\n" + "\t'name' command lists the currently loaded symbols, sorted by name.\n" + "\t'code' and 'data' commands list them sorted by address; 'code' lists\n" + "\tonly TEXT symbols, 'data' lists DATA/BSS/ABS symbols.\n" "\n" - "\tGiving either 'name' or 'addr' instead of a file name, will\n" - "\tlist the currently loaded symbols. Giving 'free' will remove\n" - "\tthe loaded symbols.\n" + "\tBy default, symbols are loaded from the currently executing program's\n" + "\tbinary when entering the debugger, IF program is started through\n" + "\tGEMDOS HD, and they're freed when that program terminates.\n" "\n" - "\tIf one base address/offset is given, its added to all addresses.\n" - "\tIf three offsets are given (and non-zero), they're applied to\n" - "\ttext (T), data (D) and BSS (B) symbols. Given offsets are used\n" - "\tonly when loading ASCII symbol files."; + "\tThat corresponds to 'prg' command which loads (DRI/GST or a.out\n" + "\tformat) symbol table from the last program executed through\n" + "\tthe GEMDOS HD emulation.\n" + "\n" + "\t'free' command removes the loaded symbols.\n" + "\n" + "\tIf program lacks symbols, or it's not run through the GEMDOS HD\n" + "\temulation, user can ask symbols to be loaded from a file that's\n" + "\tan unstripped version of the binary. Or from an ASCII symbols file\n" + "\tproduced by the 'nm' and (Hatari) 'gst2ascii' tools.\n" + "\n" + "\tWith ASCII symbols files, given non-zero offset(s) are added to\n" + "\tthe text (T), data (D) and BSS (B) symbols. Typically one uses\n" + "\tTEXT variable, sometimes also DATA & BSS, variables for this.\n" + "\n" + "\t'resident' command toggles whether debugger will load symbols\n" + "\tbefore program terminates (if user hasn't entered debugger before\n" + "\tthis), and defers symbol freeing until another program is started.\n" + "\n" + "\t'match' command toggles whether TAB completion matches all symbols,\n" + "\tor only symbol types that should be relevant for given command."; + /** * Handle debugger 'symbols' command and its arguments */ int Symbols_Command(int nArgc, char *psArgs[]) { - enum { TYPE_NONE, TYPE_CPU, TYPE_DSP } listtype; + enum { TYPE_CPU, TYPE_DSP } listtype; Uint32 offsets[3], maxaddr; symbol_list_t *list; const char *file; @@ -967,23 +1524,52 @@ int Symbols_Command(int nArgc, char *psA if (strcmp("dspsymbols", psArgs[0]) == 0) { listtype = TYPE_DSP; maxaddr = 0xFFFF; - } else if (strcmp("symbols", psArgs[0]) == 0) { + } else { listtype = TYPE_CPU; if ( ConfigureParams.System.bAddressSpace24 ) maxaddr = 0x00FFFFFF; else maxaddr = 0xFFFFFFFF; + } + if (nArgc < 2) { + file = "name"; } else { - listtype = TYPE_NONE; - maxaddr = 0; + file = psArgs[1]; } - if (nArgc < 2 || listtype == TYPE_NONE) { - return DebugUI_PrintCmdHelp(psArgs[0]); + + /* toggle whether to autoload symbols on program start, + * and keep them until next program start (=resident), + * OR only loading them when entering the debugger and + * freeing them when program terminates. + */ + if (strcmp(file, "resident") == 0) { + ConfigureParams.Debugger.bSymbolsResident = !ConfigureParams.Debugger.bSymbolsResident; + if (ConfigureParams.Debugger.bSymbolsResident) { + Symbols_LoadCurrentProgram(); + fprintf(stderr, "Program symbols will always be loaded (with reduced warnings)\nand kept resident until next program start.\n"); + } else { + fprintf(stderr, "Program symbols will be removed when program terminates.\n"); + if (!CurrentProgramPath) { + /* make sure normal autoloading isn't prevented */ + Symbols_Free(CpuSymbolsList); + CpuSymbolsList = NULL; + } + } + return DEBUGGER_CMDDONE; + } + /* toggling whether all or only specific symbols types get TAB completed */ + if (strcmp(file, "match") == 0) { + ConfigureParams.Debugger.bMatchAllSymbols = !ConfigureParams.Debugger.bMatchAllSymbols; + if (ConfigureParams.Debugger.bMatchAllSymbols) { + fprintf(stderr, "Matching all symbols types.\n"); + } else { + fprintf(stderr, "Matching only symbols (most) relevant for given command.\n"); + } + return DEBUGGER_CMDDONE; } - file = psArgs[1]; /* handle special cases */ - if (strcmp(file, "name") == 0 || strcmp(file, "addr") == 0) { + if (strcmp(file, "name") == 0 || strcmp(file, "code") == 0 || strcmp(file, "data") == 0) { list = (listtype == TYPE_DSP ? DspSymbolsList : CpuSymbolsList); Symbols_Show(list, file); return DEBUGGER_CMDDONE; @@ -1001,7 +1587,7 @@ int Symbols_Command(int nArgc, char *psA /* get offsets */ offsets[0] = 0; - for (i = 0; i < ARRAYSIZE(offsets); i++) { + for (i = 0; i < ARRAY_SIZE(offsets); i++) { if (i+2 < nArgc) { int dummy; Eval_Expression(psArgs[i+2], &(offsets[i]), &dummy, listtype==TYPE_DSP);