Annotation of XNU/osfmk/profiling/i386/profile-asm.s, revision 1.1.1.1

1.1       root        1: /*
                      2:  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
                      3:  *
                      4:  * @APPLE_LICENSE_HEADER_START@
                      5:  * 
                      6:  * The contents of this file constitute Original Code as defined in and
                      7:  * are subject to the Apple Public Source License Version 1.1 (the
                      8:  * "License").  You may not use this file except in compliance with the
                      9:  * License.  Please obtain a copy of the License at
                     10:  * http://www.apple.com/publicsource and read it before using this file.
                     11:  * 
                     12:  * This Original Code and all software distributed under the License are
                     13:  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
                     14:  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
                     15:  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
                     16:  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
                     17:  * License for the specific language governing rights and limitations
                     18:  * under the License.
                     19:  * 
                     20:  * @APPLE_LICENSE_HEADER_END@
                     21:  */
                     22: /*
                     23:  * @OSF_COPYRIGHT@
                     24:  */
                     25: /*
                     26:  * HISTORY
                     27:  * 
                     28:  * Revision 1.1.1.1  1998/09/22 21:05:49  wsanchez
                     29:  * Import of Mac OS X kernel (~semeria)
                     30:  *
                     31:  * Revision 1.1.1.1  1998/03/07 02:26:08  wsanchez
                     32:  * Import of OSF Mach kernel (~mburg)
                     33:  *
                     34:  * Revision 1.1.7.1  1997/09/22  17:41:24  barbou
                     35:  *     MP+RT: protect cpu_number() usage against preemption.
                     36:  *     [97/09/16            barbou]
                     37:  *
                     38:  * Revision 1.1.5.1  1995/01/06  19:53:37  devrcs
                     39:  *     mk6 CR668 - 1.3b26 merge
                     40:  *     new file for mk6
                     41:  *     [1994/10/12  22:25:20  dwm]
                     42:  * 
                     43:  * Revision 1.1.2.2  1994/05/16  19:19:17  meissner
                     44:  *     Add support for converting 64-bit integers to a decimal string.
                     45:  *     Use the correct address (selfpc) when creating the prof header for gprof.
                     46:  *     [1994/04/28  21:44:59  meissner]
                     47:  * 
                     48:  * Revision 1.1.2.1  1994/04/08  17:51:42  meissner
                     49:  *     Make most stats 64 bits, except for things like memory allocation.
                     50:  *     [1994/04/02  14:58:21  meissner]
                     51:  * 
                     52:  *     Do not provide old mcount support under MK or server.
                     53:  *     Fixup stats size so it is the same as in profile-md.h.
                     54:  *     [1994/03/29  21:00:03  meissner]
                     55:  * 
                     56:  *     Use faster sequence for overflow addition.
                     57:  *     Keep {dummy,prof,gprof,old}_mcount counts in double precision.
                     58:  *     Add kernel NCPUS > 1 support.
                     59:  *     [1994/03/17  20:13:23  meissner]
                     60:  * 
                     61:  *     Add gprof/prof overflow support
                     62:  *     [1994/03/17  14:56:44  meissner]
                     63:  * 
                     64:  *     Add size of histogram counters & unused fields to profile_profil struct
                     65:  *     [1994/02/17  21:41:44  meissner]
                     66:  * 
                     67:  *     Add too_low/too_high to profile_stats.
                     68:  *     [1994/02/16  22:38:11  meissner]
                     69:  * 
                     70:  *     Bump # allocation contexts to 32 from 16.
                     71:  *     Store unique ptr address in gprof function header structure for _profile_reset.
                     72:  *     Add new fields from profile-{internal,md}.h.
                     73:  *     Align loop looking for an unlocked acontext.
                     74:  *     Count # times a locked context block was found.
                     75:  *     Expand copyright.
                     76:  *     [1994/02/07  12:40:56  meissner]
                     77:  * 
                     78:  *     Keep track of the number of times the kernel overflows the HISTCOUNTER counter.
                     79:  *     [1994/02/03  20:13:23  meissner]
                     80:  * 
                     81:  *     Add stats for {user,kernel,idle} mode in the kernel.
                     82:  *     [1994/02/03  15:17:22  meissner]
                     83:  * 
                     84:  *     No change.
                     85:  *     [1994/02/03  00:58:49  meissner]
                     86:  * 
                     87:  *     Combine _profile_{vars,stats,md}; Allow more than one _profile_vars.
                     88:  *     [1994/02/01  12:03:56  meissner]
                     89:  * 
                     90:  *     Move _mcount_ptr to be closer to other data declarations.
                     91:  *     Add text_len to profile_profil structure for mk.
                     92:  *     Split records_cnt into prof_cnt/gprof_cnt.
                     93:  *     Always update prof_cnt/gprof_cnt even if not DO_STATS.
                     94:  *     Add current/max cpu indicator to stats for kernel.
                     95:  *     [1994/01/28  23:33:20  meissner]
                     96:  * 
                     97:  *     Don't do 4+Lgotoff(lab), use separate labels.
                     98:  *     Change GPROF_HASH_SHIFT to 9 (from 8).
                     99:  *     [1994/01/26  22:00:59  meissner]
                    100:  * 
                    101:  *     Fixup NO_RECURSIVE_ALLOC to do byte loads, not word loads.
                    102:  *     [1994/01/26  20:30:57  meissner]
                    103:  * 
                    104:  *     Move callback pointers into separate allocation context.
                    105:  *     Add size fields for other structures to profile-vars.
                    106:  *     Allocate string table as one large allocation.
                    107:  *     Rewrite old mcount code once again.
                    108:  *     Use multiply to make hash value, not divide.
                    109:  *     Hash table is now a power of two.
                    110:  *     [1994/01/26  20:23:32  meissner]
                    111:  * 
                    112:  *     Cut hash table size back to 16189.
                    113:  *     Add size fields to all structures.
                    114:  *     Add major/minor version number to _profile_md.
                    115:  *     Move allocation context block pointers to _profile_vars.
                    116:  *     Move _gprof_dummy after _profile_md.
                    117:  *     New function header code now falls into hash an element
                    118:  *     to avoid having the hash code duplicated or use a macro.
                    119:  *     Fix bug in _gprof_mcount with ELF shared libraries.
                    120:  *     [1994/01/25  01:45:59  meissner]
                    121:  * 
                    122:  *     Move init functions to C code; rearrange profil varaibles.
                    123:  *     [1994/01/22  01:11:14  meissner]
                    124:  * 
                    125:  *     No change.
                    126:  *     [1994/01/20  20:56:43  meissner]
                    127:  * 
                    128:  *     Fixup copyright.
                    129:  *     [1994/01/18  23:07:39  meissner]
                    130:  * 
                    131:  *     Make flags byte-sized.
                    132:  *     Add have_bb flag.
                    133:  *     Add init_format flag.
                    134:  *     Always put word size multipler first in .space.
                    135:  *     [1994/01/18  21:57:14  meissner]
                    136:  * 
                    137:  *     Fix elfpic problems in last change.
                    138:  *     [1994/01/16  14:04:26  meissner]
                    139:  * 
                    140:  *     Rewrite gprof caching to be faster & not need a lock.
                    141:  *     Record prof information for gprof too.
                    142:  *     Bump reserved stats to 64.
                    143:  *     Bump up hash table size 30799.
                    144:  *     Conditionally use lock prefix.
                    145:  *     Change most #ifdef's to #if.
                    146:  *     DEBUG_PROFILE turns on stack frames now.
                    147:  *     Conditionally add externs to gprof to determine where time is spent.
                    148:  *     Prof_mcount uses xchgl to update function pointer.
                    149:  *     [1994/01/15  18:40:33  meissner]
                    150:  * 
                    151:  *     Fix a comment.
                    152:  *     Separate statistics from debugging (though debugging turns it on).
                    153:  *     Remove debug code that traces each gprof request.
                    154:  *     [1994/01/15  00:59:02  meissner]
                    155:  * 
                    156:  *     Move max hash bucket calculation into _gprof_write & put info in stats structure.
                    157:  *     [1994/01/04  16:15:14  meissner]
                    158:  * 
                    159:  *     Use _profile_printf to write diagnostics; add diag_stream to hold stream to write to.
                    160:  *     [1994/01/04  15:37:44  meissner]
                    161:  * 
                    162:  *     Add more allocation memory pools (gprof function hdrs in particular).
                    163:  *     For prof, gprof arc, and gprof function hdrs, allocate 16 pages at a time.
                    164:  *     Add major/minor version numbers to _profile_{vars,stats}.
                    165:  *     Add # profil buckets field to _profil_stats.
                    166:  *     [19
                    167:  * 
                    168:  * $EndLog$
                    169:  */
                    170: 
                    171: /*
                    172:  * Common 386 profiling module that is shared between the kernel, mach
                    173:  * servers, and the user space library.  Each environment includes
                    174:  * this file.
                    175:  */
                    176: 
                    177:        .file   "profile-asm.s"
                    178: 
                    179: #include <cpus.h>
                    180: 
                    181: #include <machine/asm.h>
                    182: 
                    183: /*
                    184:  * By default, debugging turns on statistics and stack frames.
                    185:  */
                    186: 
                    187: #if DEBUG_PROFILE
                    188: #ifndef DO_STATS
                    189: #define DO_STATS 1
                    190: #endif
                    191: 
                    192: #ifndef STACK_FRAMES
                    193: #define STACK_FRAMES 1
                    194: #endif
                    195: #endif
                    196: 
                    197: #ifndef OLD_MCOUNT
                    198: #define OLD_MCOUNT 0                   /* do not compile old code for mcount */
                    199: #endif
                    200: 
                    201: #ifndef DO_STATS
                    202: #define DO_STATS 1                     /* compile in statistics code */
                    203: #endif
                    204: 
                    205: #ifndef DO_LOCK
                    206: #define        DO_LOCK 0                       /* use lock; in front of increments */
                    207: #endif
                    208: 
                    209: #ifndef LOCK_STATS
                    210: #define LOCK_STATS DO_LOCK             /* update stats with lock set */
                    211: #endif
                    212: 
                    213: #ifndef STACK_FRAMES
                    214: #define STACK_FRAMES 0                 /* create stack frames for debugger */
                    215: #endif
                    216: 
                    217: #ifndef NO_RECURSIVE_ALLOC
                    218: #define NO_RECURSIVE_ALLOC 0           /* check for recursive allocs */
                    219:                                        /* (not thread safe!) */
                    220: #endif
                    221: 
                    222: #ifndef MARK_GPROF
                    223: #define MARK_GPROF 0                   /* add externs for gprof profiling */
                    224: #endif
                    225: 
                    226: #ifndef OVERFLOW
                    227: #define        OVERFLOW 1                      /* add overflow checking support */
                    228: #endif
                    229: 
                    230: /*
                    231:  * Turn on the use of the lock prefix if desired.
                    232:  */
                    233: 
                    234: #ifndef LOCK
                    235: #if DO_LOCK
                    236: #define LOCK lock;
                    237: #else
                    238: #define LOCK
                    239: #endif
                    240: #endif
                    241: 
                    242: #ifndef SLOCK
                    243: #if LOCK_STATS
                    244: #define SLOCK LOCK
                    245: #else
                    246: #define SLOCK
                    247: #endif
                    248: #endif
                    249: 
                    250: /*
                    251:  * Double or single precision incrementing
                    252:  */
                    253: 
                    254: #if OVERFLOW
                    255: #define DINC(mem)              LOCK addl $1,mem; LOCK adcl $0,4+mem
                    256: #define DINC2(mem,mem2)                LOCK addl $1,mem; LOCK adcl $0,mem2
                    257: #define SDINC(mem)             SLOCK addl $1,mem; SLOCK adcl $0,4+mem
                    258: #define SDADD(val,mem)         SLOCK addl val,mem; SLOCK adcl $0,4+mem
                    259: #define SDADDNEG(val,mem)      SLOCK subl val,mem; SLOCK adcl $0,4+mem
                    260: #define SDSUB(val,mem)         SLOCK subl val,mem; SLOCK sbbl $0,4+mem
                    261: 
                    262: #else
                    263: #define DINC(mem)              LOCK incl mem
                    264: #define DINC2(mem,mem2)                LOCK incl mem
                    265: #define SDINC(mem)             SLOCK incl mem
                    266: #define        SDADD(val,mem)          SLOCK addl val,mem
                    267: #define        SDADDNEG(val,mem)       SLOCK subl val,mem
                    268: #define        SDSUB(val,mem)          SLOCK subl val,mem
                    269: #endif
                    270: 
                    271: /*
                    272:  * Stack frame support so that debugger traceback works.
                    273:  */
                    274: 
                    275: #if STACK_FRAMES
                    276: #define        ENTER   pushl %ebp; movl %esp,%ebp
                    277: #define        LEAVE0  popl %ebp
                    278: #define        Estack  4
                    279: #else
                    280: #define        ENTER
                    281: #define        LEAVE0
                    282: #define        Estack  0
                    283: #endif
                    284: 
                    285: /*
                    286:  * Gprof profiling.
                    287:  */
                    288: 
                    289: #if MARK_GPROF
                    290: #define MARK(name) .globl EXT(name); ELF_FUNC(EXT(name)); ELF_SIZE(EXT(name),0); LEXT(name)
                    291: #else
                    292: #define MARK(name)
                    293: #endif
                    294: 
                    295: /*
                    296:  * Profiling allocation context block.  Each time memory is needed, the
                    297:  * allocator loops until it finds an unlocked context block, and allocates
                    298:  * from that block.  If no context blocks are available, a new memory
                    299:  * pool is allocated, and added to the end of the chain.
                    300:  */
                    301: 
                    302: LCL(A_next)            = 0                     /* next context block link (must be 0) */
                    303: LCL(A_plist)           = LCL(A_next)+4         /* head of page list for context block */
                    304: LCL(A_lock)            = LCL(A_plist)+4        /* lock word */
                    305: LCL(A_size)            = LCL(A_lock)+4         /* size of context block */
                    306: 
                    307: #define        A_next          LCL(A_next)
                    308: #define        A_plist         LCL(A_plist)
                    309: #define        A_lock          LCL(A_lock)
                    310: #define        A_size          LCL(A_size)
                    311: 
                    312: /*
                    313:  * Allocation contexts used.
                    314:  */
                    315: 
                    316: LCL(C_prof)            = 0                     /* prof records */
                    317: LCL(C_gprof)           = 1                     /* gprof arc records */
                    318: LCL(C_gfunc)           = 2                     /* gprof function headers */
                    319: LCL(C_misc)            = 3                     /* misc. allocations */
                    320: LCL(C_profil)          = 4                     /* memory for profil */
                    321: LCL(C_dci)             = 5                     /* memory for dci */
                    322: LCL(C_bb)              = 6                     /* memory for basic blocks */
                    323: LCL(C_callback)                = 7                     /* memory for callbacks */
                    324: LCL(C_max)             = 32                    /* # allocation contexts */
                    325: 
                    326: #define        C_prof          LCL(C_prof)
                    327: #define        C_gprof         LCL(C_gprof)
                    328: #define        C_gfunc         LCL(C_gfunc)
                    329: #define        C_max           LCL(C_max)
                    330: 
                    331: /*
                    332:  * Linked list of memory allocations.
                    333:  */
                    334: 
                    335: LCL(M_first)           = 0                     /* pointer to first byte available */
                    336: LCL(M_ptr)             = LCL(M_first)+4        /* pointer to next available byte */
                    337: LCL(M_next)            = LCL(M_ptr)+4          /* next page allocated */
                    338: LCL(M_nfree)           = LCL(M_next)+4         /* # bytes available */
                    339: LCL(M_nalloc)          = LCL(M_nfree)+4        /* # bytes allocated */
                    340: LCL(M_num)             = LCL(M_nalloc)+4       /* # allocations done on this page */
                    341: LCL(M_size)            = LCL(M_num)+4          /* size of page header */
                    342: 
                    343: #define        M_first         LCL(M_first)
                    344: #define        M_ptr           LCL(M_ptr)
                    345: #define        M_next          LCL(M_next)
                    346: #define        M_nfree         LCL(M_nfree)
                    347: #define        M_nalloc        LCL(M_nalloc)
                    348: #define        M_num           LCL(M_num)
                    349: #define        M_size          LCL(M_size)
                    350: 
                    351: /*
                    352:  * Prof data type.
                    353:  */
                    354: 
                    355: LCL(P_addr)            = 0                     /* function address */
                    356: LCL(P_count)           = LCL(P_addr)+4         /* # times function called */
                    357: LCL(P_overflow)                = LCL(P_count)+4        /* # times count overflowed */
                    358: LCL(P_size)            = LCL(P_overflow)+4     /* size of prof data type */
                    359: 
                    360: #define        P_addr          LCL(P_addr)
                    361: #define        P_count         LCL(P_count)
                    362: #define        P_overflow      LCL(P_overflow)
                    363: #define        P_size          LCL(P_size)
                    364: 
                    365: /*
                    366:  * Gprof data type.
                    367:  */
                    368: 
                    369: LCL(G_next)            = 0                     /* next hash link (must be 0) */
                    370: LCL(G_frompc)          = LCL(G_next)+4         /* caller's caller */
                    371: LCL(G_selfpc)          = LCL(G_frompc)+4       /* caller's address */
                    372: LCL(G_count)           = LCL(G_selfpc)+4       /* # times arc traversed */
                    373: LCL(G_overflow)                = LCL(G_count)+4        /* # times count overflowed */
                    374: LCL(G_size)            = LCL(G_overflow)+4     /* size of gprof data type */
                    375: 
                    376: #define        G_next          LCL(G_next)
                    377: #define        G_frompc        LCL(G_frompc)
                    378: #define        G_selfpc        LCL(G_selfpc)
                    379: #define        G_count         LCL(G_count)
                    380: #define        G_overflow      LCL(G_overflow)
                    381: #define        G_size          LCL(G_size)
                    382: 
                    383: /*
                    384:  * Gprof header.
                    385:  *
                    386:  * At least one header is allocated for each unique function that is profiled.
                    387:  * In order to save time calculating the hash value, the last H_maxcache
                    388:  * distinct arcs are cached within this structure.  Also, to avoid loading
                    389:  * the GOT when searching the hash table, we copy the hash pointer to this
                    390:  * structure, so that we only load the GOT when we need to allocate an arc.
                    391:  */
                    392: 
                    393: LCL(H_maxcache)                = 3                     /* # of cache table entries */
                    394: LCL(H_csize)           = 4*LCL(H_maxcache)     /* size of each cache array */
                    395: 
                    396: LCL(H_hash_ptr)                = 0                     /* hash table to use */
                    397: LCL(H_unique_ptr)      = LCL(H_hash_ptr)+4     /* function unique pointer */
                    398: LCL(H_prof)            = LCL(H_unique_ptr)+4   /* prof statistics */
                    399: LCL(H_cache_ptr)       = LCL(H_prof)+P_size    /* cache table of element pointers */
                    400: LCL(H_size)            = LCL(H_cache_ptr)+LCL(H_csize) /* size of gprof header type */
                    401: 
                    402: #define        H_maxcache      LCL(H_maxcache)
                    403: #define        H_csize         LCL(H_csize)
                    404: #define        H_hash_ptr      LCL(H_hash_ptr)
                    405: #define        H_unique_ptr    LCL(H_unique_ptr)
                    406: #define        H_prof          LCL(H_prof)
                    407: #define        H_cache_ptr     LCL(H_cache_ptr)
                    408: #define        H_size          LCL(H_size)
                    409: 
                    410: /*
                    411:  * Number of digits needed to write a 64 bit number including trailing null.
                    412:  * (rounded up to be divisable by 4).
                    413:  */
                    414: 
                    415: #define N_digit                24
                    416: 
                    417: 
                    418:        .data
                    419: 
                    420: /*
                    421:  * Default gprof hash table size, which must be a power of two.
                    422:  * The shift specifies how many low order bits to eliminate when
                    423:  * calculating the hash value.
                    424:  */
                    425: 
                    426: #ifndef GPROF_HASH_SIZE
                    427: #define GPROF_HASH_SIZE 16384
                    428: #endif
                    429: 
                    430: #ifndef GPROF_HASH_SHIFT
                    431: #define        GPROF_HASH_SHIFT 9
                    432: #endif
                    433: 
                    434: #define GPROF_HASH_MASK (GPROF_HASH_SIZE-1)
                    435: 
                    436: DATA(_profile_hash_size)
                    437:        .long   GPROF_HASH_SIZE
                    438: ENDDATA(_profile_hash_size)
                    439: 
                    440: 
                    441: 
                    442: /*
                    443:  * Pointer that the compiler uses to call to the appropriate mcount function.
                    444:  */
                    445: 
                    446: DATA(_mcount_ptr)
                    447:        .long   EXT(_dummy_mcount)
                    448: ENDDATA(_mcount_ptr)
                    449: 
                    450: /*
                    451:  * Global profile variables.  The structure that accesses this in C is declared
                    452:  * in profile-internal.h.  All items in .data that follow this will be used as
                    453:  * one giant record, and each unique machine, thread, kgmon output or what have
                    454:  * you will create a separate instance.  Typically there is only one instance
                    455:  * which will be the memory laid out below.
                    456:  */
                    457: 
                    458: LCL(var_major_version) = 0                             /* major version number */
                    459: LCL(var_minor_version) = LCL(var_major_version)+4      /* minor version number */
                    460: LCL(vars_size)         = LCL(var_minor_version)+4      /* size of _profile_vars structure */
                    461: LCL(plist_size)                = LCL(vars_size)+4              /* size of page_list structure */
                    462: LCL(acontext_size)     = LCL(plist_size)+4             /* size of allocation contexts */
                    463: LCL(callback_size)     = LCL(acontext_size)+4          /* size of callback structure */
                    464: LCL(type)              = LCL(callback_size)+4          /* profile type (gprof, prof) */
                    465: LCL(error_msg)         = LCL(type)+4                   /* error message for perror */
                    466: LCL(filename)          = LCL(error_msg)+4              /* filename to write to */
                    467: LCL(str_ptr)           = LCL(filename)+4               /* string table pointer */
                    468: LCL(stream)            = LCL(str_ptr)+4                /* stdio stream to write to */
                    469: LCL(diag_stream)       = LCL(stream)+4                 /* stdio stream to write diagnostics to */
                    470: LCL(fwrite_func)       = LCL(diag_stream)+4            /* function like fwrite to output bytes */
                    471: LCL(page_size)         = LCL(fwrite_func)+4            /* page size in bytes */
                    472: LCL(str_bytes)         = LCL(page_size)+4              /* # bytes in string table */
                    473: LCL(str_total)         = LCL(str_bytes)+4              /* # total bytes allocated for string table */
                    474: LCL(clock_ticks)       = LCL(str_total)+4              /* # clock ticks per second */
                    475: 
                    476:                                                        /* profil variables */
                    477: LCL(profil_start)      = LCL(clock_ticks)+4            /* start of profil variables */
                    478: LCL(lowpc)             = LCL(clock_ticks)+4            /* lowest address */
                    479: LCL(highpc)            = LCL(lowpc)+4                  /* highest address */
                    480: LCL(text_len)          = LCL(highpc)+4                 /* highpc-lowpc */
                    481: LCL(profil_len)                = LCL(text_len)+4               /* size of profil buffer */
                    482: LCL(counter_size)      = LCL(profil_len)+4             /* size of indivual counter */
                    483: LCL(scale)             = LCL(counter_size)+4           /* scale factor */
                    484: LCL(profil_unused)     = LCL(scale)+4                  /* unused fields */
                    485: LCL(profil_end)                = LCL(profil_unused)+4*8        /* end of profil_info structure */
                    486: LCL(profil_buf)                = LCL(profil_end)               /* buffer for profil */
                    487: 
                    488:                                                        /* Output selection func ptrs */
                    489: LCL(output_init)       = LCL(profil_buf)+4             /* Initialization */
                    490: LCL(output)            = LCL(output_init)+4            /* Write out profiling info */
                    491: LCL(output_ptr)                = LCL(output)+4                 /* Output specific data ptr */
                    492: 
                    493:                                                        /* Memory allocation support */
                    494: LCL(acontext)          = LCL(output_ptr)+4             /* pointers to allocation context blocks */
                    495: 
                    496: LCL(bogus_func)                = LCL(acontext)+4*C_max         /* function to use if gprof arc is bad */
                    497: LCL(vars_unused)       = LCL(bogus_func)+4             /* future growth */
                    498: 
                    499:                                                        /* flags */
                    500: LCL(init)              = LCL(vars_unused)+4*63         /* whether initializations were done */
                    501: LCL(active)            = LCL(init)+1                   /* whether profiling is active */
                    502: LCL(do_profile)                = LCL(active)+1                 /* whether to do profiling */
                    503: LCL(use_dci)           = LCL(do_profile)+1             /* whether to use DCI */
                    504: LCL(use_profil)                = LCL(use_dci)+1                /* whether to use profil */
                    505: LCL(recursive_alloc)   = LCL(use_profil)+1             /* alloc called recursively */
                    506: LCL(output_uarea)      = LCL(recursive_alloc)+1        /* output uarea */
                    507: LCL(output_stats)      = LCL(output_uarea)+1           /* output stats info */
                    508: LCL(output_clock)      = LCL(output_stats)+1           /* output the clock ticks */
                    509: LCL(multiple_sections) = LCL(output_clock)+1           /* multiple sections are ok */
                    510: LCL(have_bb)           = LCL(multiple_sections)+1      /* whether we have basic block data */
                    511: LCL(init_format)       = LCL(have_bb)+1                /* The output format has been chosen */
                    512: LCL(debug)             = LCL(init_format)+1            /* Whether or not we are debugging */
                    513: LCL(check_funcs)       = LCL(debug)+1                  /* Whether to check functions for validity */
                    514: LCL(flag_unused)       = LCL(check_funcs)+1            /* unused flags */
                    515: LCL(end_of_vars)       = LCL(flag_unused)+62           /* size of machine independent vars */
                    516: 
                    517: /*
                    518:  * Data that contains profile statistics that can be dumped out
                    519:  * into the {,g}mon.out file.  This is defined in profile-md.h.
                    520:  */
                    521: 
                    522: LCL(stats_start)       = LCL(end_of_vars)              /* start of stats substructure */
                    523: LCL(stats_major_version)= LCL(stats_start)             /* major version number */
                    524: LCL(stats_minor_version)= LCL(stats_major_version)+4   /* minor version number */
                    525: LCL(stats_size)                = LCL(stats_minor_version)+4    /* size of _profile_stats structure */
                    526: LCL(profil_buckets)    = LCL(stats_size)+4             /* # profil buckets */
                    527: LCL(my_cpu)            = LCL(profil_buckets)+4         /* identify which cpu/thread this is */
                    528: LCL(max_cpu)           = LCL(my_cpu)+4                 /* identify which cpu/thread this is */
                    529: LCL(prof_records)      = LCL(max_cpu)+4                /* # of profiled functions */
                    530: LCL(gprof_records)     = LCL(prof_records)+4           /* # of gprof arcs created */
                    531: LCL(hash_buckets)      = LCL(gprof_records)+4          /* max gprof hash buckets on a chain */
                    532: LCL(bogus_count)       = LCL(hash_buckets)+4           /* # bogus functions found in gprof */
                    533: 
                    534: LCL(cnt)               = LCL(bogus_count)+4            /* # of _{prof,gprof}_mcount calls */
                    535: LCL(dummy)             = LCL(cnt)+8                    /* # of _dummy_mcount calls */
                    536: LCL(old_mcount)                = LCL(dummy)+8                  /* # of old mcount calls */
                    537: LCL(hash_search)       = LCL(old_mcount)+8             /* # gprof hash buckets searched */
                    538: LCL(hash_num)          = LCL(hash_search)+8            /* # times hash table searched */
                    539: LCL(user_ticks)                = LCL(hash_num)+8               /* # ticks within user space */
                    540: LCL(kernel_ticks)      = LCL(user_ticks)+8             /* # ticks within kernel space */
                    541: LCL(idle_ticks)                = LCL(kernel_ticks)+8           /* # ticks cpu was idle */
                    542: LCL(overflow_ticks)    = LCL(idle_ticks)+8             /* # ticks where histcounter overflowed */
                    543: LCL(acontext_locked)   = LCL(overflow_ticks)+8         /* # times an acontext was locked */
                    544: LCL(too_low)           = LCL(acontext_locked)+8        /* # times histogram tick too low */
                    545: LCL(too_high)          = LCL(too_low)+8                /* # times histogram tick too low */
                    546: LCL(prof_overflow)     = LCL(too_high)+8               /* # times the prof count field overflowed */
                    547: LCL(gprof_overflow)    = LCL(prof_overflow)+8          /* # times the gprof count field overflowed */
                    548: LCL(num_alloc)         = LCL(gprof_overflow)+8         /* # allocations in each context */
                    549: LCL(bytes_alloc)       = LCL(num_alloc)+4*C_max        /* bytes allocated in each context */
                    550: LCL(num_context)       = LCL(bytes_alloc)+4*C_max      /* # allocation context blocks */
                    551: LCL(wasted)            = LCL(num_context)+4*C_max      /* # bytes wasted */
                    552: LCL(overhead)          = LCL(wasted)+4*C_max           /* # bytes of overhead */
                    553: LCL(buckets)           = LCL(overhead)+4*C_max         /* # hash indexes that have n buckets */
                    554: LCL(cache_hits1)       = LCL(buckets)+4*10             /* # gprof cache hits in bucket #1 */
                    555: LCL(cache_hits2)       = LCL(cache_hits1)+8            /* # gprof cache hits in bucket #2 */
                    556: LCL(cache_hits3)       = LCL(cache_hits2)+8            /* # gprof cache hits in bucket #3 */
                    557: LCL(stats_unused)      = LCL(cache_hits3)+8            /* reserved for future use */
                    558: LCL(stats_end)         = LCL(stats_unused)+8*64        /* end of stats structure */
                    559: 
                    560: /*
                    561:  * Machine dependent variables that no C file should access (except for
                    562:  * profile-md.c).
                    563:  */
                    564: 
                    565: LCL(md_start)          = LCL(stats_end)                /* start of md structure */
                    566: LCL(md_major_version)  = LCL(md_start)                 /* major version number */
                    567: LCL(md_minor_version)  = LCL(md_major_version)+4       /* minor version number */
                    568: LCL(md_size)           = LCL(md_minor_version)+4       /* size of _profile_stats structure */
                    569: LCL(hash_ptr)          = LCL(md_size)+4                /* gprof hash pointer */
                    570: LCL(hash_size)         = LCL(hash_ptr)+4               /* gprof hash size */
                    571: LCL(num_cache)         = LCL(hash_size)+4              /* # of cache entries */
                    572: LCL(save_mcount_ptr)   = LCL(num_cache)+4              /* save for mcount_ptr when suspending profiling */
                    573: LCL(mcount_ptr_ptr)    = LCL(save_mcount_ptr)+4        /* pointer to _mcount_ptr */
                    574: LCL(dummy_ptr)         = LCL(mcount_ptr_ptr)+4         /* pointer to gprof_dummy */
                    575: LCL(alloc_pages)       = LCL(dummy_ptr)+4              /* allocate more memory */
                    576: LCL(num_buffer)                = LCL(alloc_pages)+4            /* buffer to convert 64 bit ints in */
                    577: LCL(md_unused)         = LCL(num_buffer)+N_digit       /* unused fields */
                    578: LCL(md_end)            = LCL(md_unused)+4*58           /* end of md structure */
                    579: LCL(total_size)                = LCL(md_end)                   /* size of entire structure */
                    580: 
                    581: /*
                    582:  * Size of the entire _profile_vars structure.
                    583:  */
                    584: 
                    585: DATA(_profile_size)
                    586:        .long   LCL(total_size)
                    587: ENDDATA(_profile_size)
                    588: 
                    589: /*
                    590:  * Size of the statistics substructure.
                    591:  */
                    592: 
                    593: DATA(_profile_stats_size)
                    594:        .long   LCL(stats_end)-LCL(stats_start)
                    595: ENDDATA(_profile_stats_size)
                    596: 
                    597: /*
                    598:  * Size of the profil info substructure.
                    599:  */
                    600: 
                    601: DATA(_profile_profil_size)
                    602:        .long   LCL(profil_end)-LCL(profil_start)
                    603: ENDDATA(_profile_profil_size)
                    604: 
                    605: /*
                    606:  * Size of the machine dependent substructure.
                    607:  */
                    608: 
                    609: DATA(_profile_md_size)
                    610:        .long   LCL(md_end)-LCL(md_start)
                    611: ENDDATA(_profile_profil_size)
                    612: 
                    613: /*
                    614:  * Whether statistics are supported.
                    615:  */
                    616: 
                    617: DATA(_profile_do_stats)
                    618:        .long   DO_STATS
                    619: ENDDATA(_profile_do_stats)
                    620: 
                    621:        .text
                    622: 
                    623: /*
                    624:  * Map LCL(xxx) -> into simpler names
                    625:  */
                    626: 
                    627: #define        V_acontext              LCL(acontext)
                    628: #define        V_acontext_locked       LCL(acontext_locked)
                    629: #define        V_alloc_pages           LCL(alloc_pages)
                    630: #define        V_bogus_func            LCL(bogus_func)
                    631: #define        V_bytes_alloc           LCL(bytes_alloc)
                    632: #define        V_cache_hits1           LCL(cache_hits1)
                    633: #define        V_cache_hits2           LCL(cache_hits2)
                    634: #define        V_cache_hits3           LCL(cache_hits3)
                    635: #define        V_cnt                   LCL(cnt)
                    636: #define        V_cnt_overflow          LCL(cnt_overflow)
                    637: #define        V_check_funcs           LCL(check_funcs)
                    638: #define        V_dummy                 LCL(dummy)
                    639: #define        V_dummy_overflow        LCL(dummy_overflow)
                    640: #define        V_dummy_ptr             LCL(dummy_ptr)
                    641: #define        V_gprof_records         LCL(gprof_records)
                    642: #define        V_hash_num              LCL(hash_num)
                    643: #define        V_hash_ptr              LCL(hash_ptr)
                    644: #define        V_hash_search           LCL(hash_search)
                    645: #define        V_mcount_ptr_ptr        LCL(mcount_ptr_ptr)
                    646: #define        V_num_alloc             LCL(num_alloc)
                    647: #define        V_num_buffer            LCL(num_buffer)
                    648: #define        V_num_context           LCL(num_context)
                    649: #define        V_old_mcount            LCL(old_mcount)
                    650: #define        V_old_mcount_overflow   LCL(old_mcount_overflow)
                    651: #define        V_overhead              LCL(overhead)
                    652: #define        V_page_size             LCL(page_size)
                    653: #define        V_prof_records          LCL(prof_records)
                    654: #define        V_recursive_alloc       LCL(recursive_alloc)
                    655: #define        V_wasted                LCL(wasted)
                    656: 
                    657: /*
                    658:  * Loadup %ebx with the address of _profile_vars.  On a multiprocessor, this
                    659:  * will loads up the appropriate machine's _profile_vars structure.
                    660:  * For ELF shared libraries, rely on the fact that we won't need a GOT,
                    661:  * except to load this pointer.
                    662:  */
                    663: 
                    664: #if defined (MACH_KERNEL) && NCPUS > 1
                    665: #define ASSEMBLER
                    666: #if AT386
                    667: #include <i386/AT386/mp.h>
                    668: #endif
                    669: 
                    670: #if SQT
                    671: #include <i386/SQT/asm_macros.h>
                    672: #endif
                    673: 
                    674: #ifndef CPU_NUMBER
                    675: #error "Cannot determine how to get CPU number"
                    676: #endif
                    677: 
                    678: #define Vload  CPU_NUMBER(%ebx); movl EXT(_profile_vars_cpus)(,%ebx,4),%ebx
                    679: 
                    680: #else  /* not kernel or not multiprocessor */
                    681: #define        Vload   Gload; Egaddr(%ebx,_profile_vars)
                    682: #endif
                    683: 
                    684: 
                    685: /*
                    686:  * Allocate some memory for profiling.  This memory is guaranteed to
                    687:  * be zero.
                    688:  * %eax contains the memory size requested and will contain ptr on exit.
                    689:  * %ebx contains the address of the appropriate profile_vars structure.
                    690:  * %ecx is the number of the memory pool to allocate from (trashed on exit).
                    691:  * %edx is trashed.
                    692:  * %esi is preserved.
                    693:  * %edi is preserved.
                    694:  * %ebp is preserved.
                    695:  */
                    696: 
                    697: Entry(_profile_alloc_asm)
                    698:        ENTER
                    699:        pushl   %esi
                    700:        pushl   %edi
                    701: 
                    702:        movl    %ecx,%edi                       /* move context number to saved reg */
                    703: 
                    704: #if NO_RECURSIVE_ALLOC
                    705:        movb    $-1,%cl
                    706:        xchgb   %cl,V_recursive_alloc(%ebx)
                    707:        cmpb    $0,%cl
                    708:        je      LCL(no_recurse)
                    709: 
                    710:        int     $3
                    711: 
                    712:        .align  ALIGN
                    713: LCL(no_recurse):
                    714: #endif
                    715: 
                    716:        leal    V_acontext(%ebx,%edi,4),%ecx
                    717: 
                    718:        /* Loop looking for a free allocation context. */
                    719:        /* %eax = size, %ebx = vars addr, %ecx = ptr to allocation context to try */
                    720:        /* %edi = context number */
                    721: 
                    722:        .align  ALIGN
                    723: LCL(alloc_loop):
                    724:        movl    %ecx,%esi                       /* save ptr in case no more contexts */
                    725:        movl    A_next(%ecx),%ecx               /* next context block */
                    726:        cmpl    $0,%ecx
                    727:        je      LCL(alloc_context)              /* need to allocate a new context block */
                    728: 
                    729:        movl    $-1,%edx
                    730:        xchgl   %edx,A_lock(%ecx)               /* %edx == 0 if context available */
                    731: 
                    732: #if DO_STATS
                    733:        SDADDNEG(%edx,V_acontext_locked(%ebx))  /* increment counter if lock was held */
                    734: #endif
                    735: 
                    736:        cmpl    $0,%edx
                    737:        jne     LCL(alloc_loop)                 /* go back if this context block is not available */
                    738: 
                    739:        /* Allocation context found (%ecx), now allocate. */
                    740:        movl    A_plist(%ecx),%edx              /* pointer to current block */
                    741:        cmpl    $0,%edx                         /* first allocation? */
                    742:        je      LCL(alloc_new)
                    743: 
                    744:        cmpl    %eax,M_nfree(%edx)              /* see if we have enough space */
                    745:        jl      LCL(alloc_new)                  /* jump if not enough space */
                    746: 
                    747:        /* Allocate from local block (and common exit) */
                    748:        /* %eax = bytes to allocate, %ebx = GOT, %ecx = context, %edx = memory block */
                    749:        /* %edi = context number */
                    750: 
                    751:        .align  ALIGN
                    752: LCL(alloc_ret):
                    753: 
                    754: #if DO_STATS
                    755:        SLOCK incl V_num_alloc(%ebx,%edi,4)     /* update global counters */
                    756:        SLOCK addl %eax,V_bytes_alloc(%ebx,%edi,4)
                    757:        SLOCK subl %eax,V_wasted(%ebx,%edi,4)
                    758: #endif
                    759: 
                    760:        movl    M_ptr(%edx),%esi                /* pointer return value */
                    761:        subl    %eax,M_nfree(%edx)              /* decrement bytes remaining */
                    762:        addl    %eax,M_nalloc(%edx)             /* increment bytes allocated */
                    763:        incl    M_num(%edx)                     /* increment # allocations */
                    764:        addl    %eax,M_ptr(%edx)                /* advance pointer */
                    765:        movl    $0,A_lock(%ecx)                 /* unlock context block */
                    766:        movl    %esi,%eax                       /* return pointer */
                    767: 
                    768: #if NO_RECURSIVE_ALLOC
                    769:        movb    $0,V_recursive_alloc(%ebx)
                    770: #endif
                    771: 
                    772:        popl    %edi
                    773:        popl    %esi
                    774:        LEAVE0
                    775:        ret                                     /* return to the caller */
                    776: 
                    777:        /* Allocate space in whole number of pages */
                    778:        /* %eax = bytes to allocate, %ebx = vars address, %ecx = context */
                    779:        /* %edi = context number */
                    780: 
                    781:        .align  ALIGN
                    782: LCL(alloc_new):
                    783:        pushl   %eax                            /* save regs */
                    784:        pushl   %ecx
                    785:        movl    V_page_size(%ebx),%edx
                    786:        addl    $(M_size-1),%eax                /* add in overhead size & subtract 1 */
                    787:        decl    %edx                            /* page_size - 1 */
                    788:        addl    %edx,%eax                       /* round up to whole number of pages */
                    789:        notl    %edx
                    790:        andl    %edx,%eax
                    791:        leal    -M_size(%eax),%esi              /* save allocation size */
                    792:        pushl   %eax                            /* argument to _profile_alloc_pages */
                    793:        call    *V_alloc_pages(%ebx)            /* allocate some memory */
                    794:        addl    $4,%esp                         /* pop off argument */
                    795: 
                    796: #if DO_STATS
                    797:        SLOCK addl %esi,V_wasted(%ebx,%edi,4)   /* udpate global counters */
                    798:        SLOCK addl $M_size,V_overhead(%ebx,%edi,4)
                    799: #endif
                    800: 
                    801:        popl    %ecx                            /* context block */
                    802:        movl    %eax,%edx                       /* memory block pointer */
                    803:        movl    %esi,M_nfree(%edx)              /* # free bytes */
                    804:        addl    $M_size,%eax                    /* bump past overhead */
                    805:        movl    A_plist(%ecx),%esi              /* previous memory block or 0 */
                    806:        movl    %eax,M_first(%edx)              /* first space available */
                    807:        movl    %eax,M_ptr(%edx)                /* current address available */
                    808:        movl    %esi,M_next(%edx)               /* next memory block allocated */
                    809:        movl    %edx,A_plist(%ecx)              /* update current page list */
                    810:        popl    %eax                            /* user size request */
                    811:        jmp     LCL(alloc_ret)                  /* goto common return code */
                    812: 
                    813:        /* Allocate a context header in addition to memory block header + data */
                    814:        /* %eax = bytes to allocate, %ebx = GOT, %esi = ptr to store context ptr */
                    815:        /* %edi = context number */
                    816: 
                    817:        .align  ALIGN
                    818: LCL(alloc_context):
                    819:        pushl   %eax                            /* save regs */
                    820:        pushl   %esi
                    821:        movl    V_page_size(%ebx),%edx
                    822:        addl    $(A_size+M_size-1),%eax         /* add in overhead size & subtract 1 */
                    823:        decl    %edx                            /* page_size - 1 */
                    824:        addl    %edx,%eax                       /* round up to whole number of pages */
                    825:        notl    %edx
                    826:        andl    %edx,%eax
                    827:        leal    -A_size-M_size(%eax),%esi       /* save allocation size */
                    828:        pushl   %eax                            /* argument to _profile_alloc_pages */
                    829:        call    *V_alloc_pages(%ebx)            /* allocate some memory */
                    830:        addl    $4,%esp                         /* pop off argument */
                    831: 
                    832: #if DO_STATS
                    833:        SLOCK incl V_num_context(%ebx,%edi,4)   /* bump # context blocks */
                    834:        SLOCK addl %esi,V_wasted(%ebx,%edi,4)   /* update global counters */
                    835:        SLOCK addl $(A_size+M_size),V_overhead(%ebx,%edi,4)
                    836: #endif
                    837: 
                    838:        movl    %eax,%ecx                       /* context pointer */
                    839:        leal    A_size(%eax),%edx               /* memory block pointer */
                    840:        movl    %esi,M_nfree(%edx)              /* # free bytes */
                    841:        addl    $(A_size+M_size),%eax           /* bump past overhead */
                    842:        movl    %eax,M_first(%edx)              /* first space available */
                    843:        movl    %eax,M_ptr(%edx)                /* current address available */
                    844:        movl    $0,M_next(%edx)                 /* next memory block allocated */
                    845:        movl    %edx,A_plist(%ecx)              /* head of memory block list */
                    846:        movl    $1,A_lock(%ecx)                 /* set lock */
                    847:        popl    %esi                            /* ptr to store context block link */
                    848:        movl    %ecx,%eax                       /* context pointer temp */
                    849:        xchgl   %eax,A_next(%esi)               /* link into chain */
                    850:        movl    %eax,A_next(%ecx)               /* add links in case of threading */
                    851:        popl    %eax                            /* user size request */
                    852:        jmp     LCL(alloc_ret)                  /* goto common return code */
                    853: 
                    854: END(_profile_alloc_asm)
                    855: 
                    856: /*
                    857:  * C callable version of the profile memory allocator.
                    858:  * extern void *_profile_alloc(struct profile_vars *, size_t, acontext_type_t);
                    859: */
                    860: 
                    861: Entry(_profile_alloc)
                    862:        ENTER
                    863:        pushl   %ebx
                    864:        movl    12+Estack(%esp),%eax            /* memory size */
                    865:        movl    8+Estack(%esp),%ebx             /* provile_vars address */
                    866:        addl    $3,%eax                         /* round up to word boundary */
                    867:        movl    16+Estack(%esp),%ecx            /* which memory pool to allocate from */
                    868:        andl    $0xfffffffc,%eax
                    869:        call    EXT(_profile_alloc_asm)
                    870:        popl    %ebx
                    871:        LEAVE0
                    872:        ret
                    873: END(_profile_alloc)
                    874: 
                    875: 
                    876: /*
                    877:  * Dummy mcount routine that just returns.
                    878:  *
                    879:  *             +-------------------------------+
                    880:  *             |                               |
                    881:  *             |                               |
                    882:  *             | caller's caller stack,        |
                    883:  *             | saved registers, params.      |
                    884:  *             |                               |
                    885:  *             |                               |
                    886:  *             +-------------------------------+
                    887:  *             | caller's caller return addr.  |
                    888:  *             +-------------------------------+
                    889:  *     esp --> | caller's return address       |
                    890:  *             +-------------------------------+
                    891:  *
                    892:  *     edx --> function unqiue LCL
                    893:  */
                    894: 
                    895: Entry(_dummy_mcount)
                    896:        ENTER
                    897: 
                    898: #if DO_STATS
                    899:        pushl   %ebx
                    900:        MP_DISABLE_PREEMPTION(%ebx)
                    901:        Vload
                    902:        SDINC(V_dummy(%ebx))
                    903:        MP_ENABLE_PREEMPTION(%ebx)
                    904:        popl    %ebx
                    905: #endif
                    906: 
                    907:        LEAVE0
                    908:        ret
                    909: END(_dummy_mcount)
                    910: 
                    911: 
                    912: /*
                    913:  * Entry point for System V based profiling, count how many times each function
                    914:  * is called.  The function label is passed in %edx, and the top two words on
                    915:  * the stack are the caller's address, and the caller's return address.
                    916:  *
                    917:  *             +-------------------------------+
                    918:  *             |                               |
                    919:  *             |                               |
                    920:  *             | caller's caller stack,        |
                    921:  *             | saved registers, params.      |
                    922:  *             |                               |
                    923:  *             |                               |
                    924:  *             +-------------------------------+
                    925:  *             | caller's caller return addr.  |
                    926:  *             +-------------------------------+
                    927:  *     esp --> | caller's return address       |
                    928:  *             +-------------------------------+
                    929:  *
                    930:  *     edx --> function unique label
                    931:  *
                    932:  * We don't worry about the possibility about two threads calling
                    933:  * the same function for the first time simulataneously.  If that
                    934:  * happens, two records will be created, and one of the records
                    935:  * address will be stored in in the function unique label (which
                    936:  * is aligned by the compiler, so we don't have to watch out for
                    937:  * crossing page/cache boundaries).
                    938:  */
                    939: 
                    940: Entry(_prof_mcount)
                    941:        ENTER
                    942: 
                    943: #if DO_STATS
                    944:        pushl   %ebx
                    945:        MP_DISABLE_PREEMPTION(%ebx)
                    946:        Vload
                    947:        SDINC(V_cnt(%ebx))
                    948: #endif
                    949: 
                    950:        movl    (%edx),%eax                     /* initialized? */
                    951:        cmpl    $0,%eax
                    952:        je      LCL(pnew)
                    953: 
                    954:        DINC2(P_count(%eax),P_overflow(%eax))   /* bump function count (double precision) */
                    955: 
                    956: #if DO_STATS
                    957:        MP_ENABLE_PREEMPTION(%ebx)
                    958:        popl    %ebx
                    959: #endif
                    960: 
                    961:        LEAVE0
                    962:        ret
                    963: 
                    964:        .align  ALIGN
                    965: LCL(pnew):
                    966: 
                    967: #if !DO_STATS
                    968:        pushl   %ebx
                    969:        MP_DISABLE_PREEMPTION(%ebx)
                    970:        Vload
                    971: #endif
                    972: 
                    973:        SLOCK incl V_prof_records(%ebx)
                    974:        pushl   %edx
                    975:        movl    $P_size,%eax                    /* allocation size */
                    976:        movl    $C_prof,%ecx                    /* allocation pool */
                    977:        call    EXT(_profile_alloc_asm)         /* allocate a new record */
                    978:        popl    %edx
                    979: 
                    980:        movl    Estack+4(%esp),%ecx             /* caller's address */
                    981:        movl    %ecx,P_addr(%eax)
                    982:        movl    $1,P_count(%eax)                /* call count */
                    983:        xchgl   %eax,(%edx)                     /* update function header */
                    984:        MP_ENABLE_PREEMPTION(%ebx)
                    985:        popl    %ebx
                    986:        LEAVE0
                    987:        ret
                    988: 
                    989: END(_prof_mcount)
                    990: 
                    991: 
                    992: /*
                    993:  * Entry point for BSD based graph profiling, count how many times each unique
                    994:  * call graph (caller + callee) is called.  The function label is passed in
                    995:  * %edx, and the top two words on the stack are the caller's address, and the
                    996:  * caller's return address.
                    997:  *
                    998:  *             +-------------------------------+
                    999:  *             |                               |
                   1000:  *             |                               |
                   1001:  *             | caller's caller stack,        |
                   1002:  *             | saved registers, params.      |
                   1003:  *             |                               |
                   1004:  *             |                               |
                   1005:  *             +-------------------------------+
                   1006:  *             | caller's caller return addr.  |
                   1007:  *             +-------------------------------+
                   1008:  *     esp --> | caller's return address       |
                   1009:  *             +-------------------------------+
                   1010:  *
                   1011:  *     edx --> function unqiue label
                   1012:  *
                   1013:  * We don't worry about the possibility about two threads calling the same
                   1014:  * function simulataneously.  If that happens, two records will be created, and
                   1015:  * one of the records address will be stored in in the function unique label
                   1016:  * (which is aligned by the compiler).
                   1017:  *
                   1018:  * By design, the gprof header is not locked.  Each of the cache pointers is
                   1019:  * always a valid pointer (possibily to a null record), and if another thread
                   1020:  * comes in and modifies the pointer, it does so automatically with a simple store.
                   1021:  * Since all arcs are in the hash table, the caches are just to avoid doing
                   1022:  * a multiplication in the common case, and if they don't match, the arcs will
                   1023:  * still be found.
                   1024:  */
                   1025: 
                   1026: Entry(_gprof_mcount)
                   1027: 
                   1028:        ENTER
                   1029:        movl    Estack+4(%esp),%ecx             /* caller's caller address */
                   1030: 
                   1031: #if DO_STATS
                   1032:        pushl   %ebx
                   1033:        MP_DISABLE_PREEMPTION(%ebx)
                   1034:        Vload
                   1035:        SDINC(V_cnt(%ebx))                      /* bump profile call counter (double int) */
                   1036: #endif
                   1037: 
                   1038:        movl    (%edx),%eax                     /* Gprof header allocated? */
                   1039:        cmpl    $0,%eax
                   1040:        je      LCL(gnew)                       /* skip if first call */
                   1041: 
                   1042:        DINC2(H_prof+P_count(%eax),H_prof+P_overflow(%eax))     /* bump function count */
                   1043: 
                   1044:        /* See if this call arc is the same as the last time */
                   1045: MARK(_gprof_mcount_cache1)
                   1046:        movl    H_cache_ptr(%eax),%edx          /* last arc searched */
                   1047:        cmpl    %ecx,G_frompc(%edx)             /* skip if not equal */
                   1048:        jne     LCL(gcache2)
                   1049: 
                   1050:        /* Same as last time, increment and return */
                   1051: 
                   1052:        DINC2(G_count(%edx),G_overflow(%edx))   /* bump arc count */
                   1053: 
                   1054: #if DO_STATS
                   1055:        SDINC(V_cache_hits1(%ebx))              /* update counter */
                   1056:        MP_ENABLE_PREEMPTION(%ebx)
                   1057:        popl    %ebx
                   1058: #endif
                   1059: 
                   1060:        LEAVE0
                   1061:        ret
                   1062: 
                   1063:        /* Search second cache entry */
                   1064:        /* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */
                   1065:        /* %edx = first arc searched */
                   1066:        /* %ebx if DO_STATS pushed on stack */
                   1067: 
                   1068:        .align  ALIGN
                   1069: MARK(_gprof_mcount_cache2)
                   1070: LCL(gcache2):
                   1071:        pushl   %esi                            /* get a saved register */
                   1072:        movl    H_cache_ptr+4(%eax),%esi        /* 2nd arc to be searched */
                   1073:        cmpl    %ecx,G_frompc(%esi)             /* skip if not equal */
                   1074:        jne     LCL(gcache3)
                   1075: 
                   1076:        /* Element found, increment, reset last arc searched and return */
                   1077: 
                   1078:        DINC2(G_count(%esi),G_overflow(%esi))   /* bump arc count */
                   1079: 
                   1080:        movl    %esi,H_cache_ptr+0(%eax)        /* swap 1st and 2nd cached arcs */
                   1081:        popl    %esi
                   1082:        movl    %edx,H_cache_ptr+4(%eax)
                   1083: 
                   1084: #if DO_STATS
                   1085:        SDINC(V_cache_hits2(%ebx))              /* update counter */
                   1086:        MP_ENABLE_PREEMPTION(%ebx)
                   1087:        popl    %ebx
                   1088: #endif
                   1089: 
                   1090:        LEAVE0
                   1091:        ret
                   1092: 
                   1093:        /* Search third cache entry */
                   1094:        /* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */
                   1095:        /* %edx = first arc searched, %esi = second arc searched */
                   1096:        /* %esi, %ebx if DO_STATS pushed on stack */
                   1097: 
                   1098:        .align  ALIGN
                   1099: MARK(_gprof_mcount_cache3)
                   1100: LCL(gcache3):
                   1101:        pushl   %edi
                   1102:        movl    H_cache_ptr+8(%eax),%edi        /* 3rd arc to be searched */
                   1103:        cmpl    %ecx,G_frompc(%edi)             /* skip if not equal */
                   1104:        jne     LCL(gnocache)
                   1105: 
                   1106:        /* Element found, increment, reset last arc searched and return */
                   1107: 
                   1108:        DINC2(G_count(%edi),G_overflow(%edi))   /* bump arc count */
                   1109: 
                   1110:        movl    %edi,H_cache_ptr+0(%eax)        /* make this 1st cached arc */
                   1111:        movl    %esi,H_cache_ptr+8(%eax)
                   1112:        movl    %edx,H_cache_ptr+4(%eax)
                   1113:        popl    %edi
                   1114:        popl    %esi
                   1115: 
                   1116: #if DO_STATS
                   1117:        SDINC(V_cache_hits3(%ebx))              /* update counter */
                   1118:        MP_ENABLE_PREEMPTION(%ebx)
                   1119:        popl    %ebx
                   1120: #endif
                   1121: 
                   1122:        LEAVE0
                   1123:        ret
                   1124: 
                   1125:        /* No function context, allocate a new context */
                   1126:        /* %ebx is the variables address if DO_STATS */
                   1127:        /* %ecx is the caller's caller's address */
                   1128:        /* %edx is the unique function pointer */
                   1129:        /* %ebx if DO_STATS pushed on stack */
                   1130: 
                   1131:        .align  ALIGN
                   1132: MARK(_gprof_mcount_new)
                   1133: LCL(gnew):
                   1134:        pushl   %esi
                   1135:        pushl   %edi
                   1136: 
                   1137: #if !DO_STATS
                   1138:        pushl   %ebx                            /* Address of vars needed for alloc */
                   1139:        MP_DISABLE_PREEMPTION(%ebx)
                   1140:        Vload                                   /* stats already loaded address */
                   1141: #endif
                   1142: 
                   1143:        SLOCK incl V_prof_records(%ebx)
                   1144:        movl    %edx,%esi                       /* save unique function ptr */
                   1145:        movl    %ecx,%edi                       /* and caller's caller address */
                   1146:        movl    $H_size,%eax                    /* memory block size */
                   1147:        movl    $C_gfunc,%ecx                   /* gprof function header memory pool */
                   1148:        call    EXT(_profile_alloc_asm)
                   1149: 
                   1150:        movl    V_hash_ptr(%ebx),%ecx           /* copy hash_ptr to func header */
                   1151:        movl    V_dummy_ptr(%ebx),%edx          /* dummy cache entry */
                   1152:        movl    %ecx,H_hash_ptr(%eax)
                   1153:        movl    %edx,H_cache_ptr+0(%eax)        /* store dummy cache ptrs */
                   1154:        movl    %edx,H_cache_ptr+4(%eax)
                   1155:        movl    %edx,H_cache_ptr+8(%eax)
                   1156:        movl    %esi,H_unique_ptr(%eax)         /* remember function unique ptr */
                   1157:        movl    Estack+12(%esp),%ecx            /* caller's address */
                   1158:        movl    $1,H_prof+P_count(%eax)         /* function called once so far */
                   1159:        movl    %ecx,H_prof+P_addr(%eax)        /* set up prof information */
                   1160:        movl    %eax,(%esi)                     /* update context block address */
                   1161:        movl    %edi,%ecx                       /* caller's caller address */
                   1162:        movl    %edx,%esi                       /* 2nd cached arc */
                   1163: 
                   1164: #if !DO_STATS
                   1165:        popl    %ebx
                   1166: #endif
                   1167: 
                   1168:        /* Fall through to add element to the hash table.  This may involve */
                   1169:        /* searching a few hash table elements that don't need to be searched */
                   1170:        /* since we have a new element, but it allows the hash table function */
                   1171:        /* to be specified in only one place */
                   1172: 
                   1173:        /* Didn't find entry in cache, search the global hash table */
                   1174:        /* %eax = gprof func header, %ebx = vars address if DO_STATS */
                   1175:        /* %ecx = caller's caller */
                   1176:        /* %edx, %esi = cached arcs that were searched */
                   1177:        /* %edi, %esi, %ebx if DO_STATS pushed on stack */
                   1178: 
                   1179:        .align  ALIGN
                   1180: MARK(_gprof_mcount_hash)
                   1181: LCL(gnocache):
                   1182: 
                   1183:        pushl   %esi                            /* save 2nd arc searched */
                   1184:        pushl   %edx                            /* save 1st arc searched */
                   1185:        movl    %eax,%esi                       /* save gprof func header */
                   1186: 
                   1187: #if DO_STATS
                   1188:        SDINC(V_hash_num(%ebx))
                   1189:        movl    Estack+20(%esp),%edi            /* caller's address */
                   1190: #else
                   1191:        movl    Estack+16(%esp),%edi            /* caller's address */
                   1192: #endif
                   1193:        movl    %ecx,%eax                       /* caller's caller address */
                   1194:        imull   %edi,%eax                       /* multiply to get hash */
                   1195:        movl    H_hash_ptr(%esi),%edx           /* hash pointer */
                   1196:        shrl    $GPROF_HASH_SHIFT,%eax          /* eliminate low order bits */
                   1197:        andl    $GPROF_HASH_MASK,%eax           /* mask to get hash value */
                   1198:        leal    0(%edx,%eax,4),%eax             /* pointer to hash bucket */
                   1199:        movl    %eax,%edx                       /* save hash bucket address */
                   1200: 
                   1201:        /* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */
                   1202:        /* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */
                   1203:        /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
                   1204: 
                   1205:        .align  ALIGN
                   1206: LCL(ghash):
                   1207:        movl    G_next(%eax),%eax               /* get next hash element */
                   1208:        cmpl    $0,%eax                         /* end of line? */
                   1209:        je      LCL(ghashnew)                   /* skip if allocate new hash */
                   1210: 
                   1211: #if DO_STATS
                   1212:        SDINC(V_hash_search(%ebx))
                   1213: #endif
                   1214: 
                   1215:        cmpl    G_selfpc(%eax),%edi             /* loop back if not one we want */
                   1216:        jne     LCL(ghash)
                   1217: 
                   1218:        cmpl    G_frompc(%eax),%ecx             /* loop back if not one we want */
                   1219:        jne     LCL(ghash)
                   1220: 
                   1221:        /* Found an entry, increment count, set up for caching, and return */
                   1222:        /* %eax = arc, %ebx = vars address if DO_STATS, %esi = func header */
                   1223:        /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
                   1224: 
                   1225:        DINC2(G_count(%eax),G_overflow(%eax))   /* bump arc count */
                   1226: 
                   1227:        popl    %ecx                            /* previous 1st arc searched */
                   1228:        movl    %eax,H_cache_ptr+0(%esi)        /* this element is now 1st arc */
                   1229:        popl    %edi                            /* previous 2nd arc searched */
                   1230:        movl    %ecx,H_cache_ptr+4(%esi)        /* new 2nd arc to be searched */
                   1231:        movl    %edi,H_cache_ptr+8(%esi)        /* new 3rd arc to be searched */
                   1232:        popl    %edi
                   1233:        popl    %esi
                   1234: 
                   1235: #if DO_STATS
                   1236:        MP_ENABLE_PREEMPTION(%ebx)
                   1237:        popl    %ebx
                   1238: #endif
                   1239: 
                   1240:        LEAVE0
                   1241:        ret                                     /* return to user */
                   1242: 
                   1243:        /* Allocate new arc */
                   1244:        /* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */
                   1245:        /* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */
                   1246:        /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
                   1247: 
                   1248:        .align  ALIGN
                   1249: MARK(_gprof_mcount_hashnew)
                   1250: LCL(ghashnew):
                   1251: 
                   1252: #if !DO_STATS
                   1253:        pushl   %ebx                            /* load address of vars if we haven't */
                   1254:        MP_DISABLE_PREEMPTION(%ebx)
                   1255:        Vload                                   /* already done so */
                   1256: #endif
                   1257: 
                   1258:        SLOCK incl V_gprof_records(%ebx)
                   1259:        pushl   %edx
                   1260:        movl    %ecx,%edi                       /* save caller's caller */
                   1261:        movl    $G_size,%eax                    /* arc size */
                   1262:        movl    $C_gprof,%ecx                   /* gprof memory pool */
                   1263:        call    EXT(_profile_alloc_asm)
                   1264:        popl    %edx
                   1265: 
                   1266:        movl    $1,G_count(%eax)                /* set call count */
                   1267:        movl    Estack+20(%esp),%ecx            /* caller's address */
                   1268:        movl    %edi,G_frompc(%eax)             /* caller's caller */
                   1269:        movl    %ecx,G_selfpc(%eax)
                   1270: 
                   1271: #if !DO_STATS
                   1272:        popl    %ebx                            /* release %ebx if no stats */
                   1273: #endif
                   1274: 
                   1275:        movl    (%edx),%ecx                     /* first hash bucket */
                   1276:        movl    %ecx,G_next(%eax)               /* update link */
                   1277:        movl    %eax,%ecx                       /* copy for xchgl */
                   1278:        xchgl   %ecx,(%edx)                     /* add to hash linked list */
                   1279:        movl    %ecx,G_next(%eax)               /* update in case list changed */
                   1280: 
                   1281:        popl    %ecx                            /* previous 1st arc searched */
                   1282:        popl    %edi                            /* previous 2nd arc searched */
                   1283:        movl    %eax,H_cache_ptr+0(%esi)        /* this element is now 1st arc */
                   1284:        movl    %ecx,H_cache_ptr+4(%esi)        /* new 2nd arc to be searched */
                   1285:        movl    %edi,H_cache_ptr+8(%esi)        /* new 3rd arc to be searched */
                   1286: 
                   1287:        popl    %edi
                   1288:        popl    %esi
                   1289: 
                   1290: #if DO_STATS
                   1291:        MP_ENABLE_PREEMPTION(%ebx)
                   1292:        popl    %ebx
                   1293: #endif
                   1294: 
                   1295:        LEAVE0
                   1296:        ret                                     /* return to user */
                   1297: 
                   1298: END(_gprof_mcount)
                   1299: 
                   1300: 
                   1301: /*
                   1302:  * This function assumes that neither the caller or it's caller
                   1303:  * has not omitted the frame pointer in order to get the caller's
                   1304:  * caller.  The stack looks like the following at the time of the call:
                   1305:  *
                   1306:  *             +-------------------------------+
                   1307:  *             |                               |
                   1308:  *             |                               |
                   1309:  *             | caller's caller stack,        |
                   1310:  *             | saved registers, params.      |
                   1311:  *             |                               |
                   1312:  *             |                               |
                   1313:  *             +-------------------------------+
                   1314:  *             | caller's caller return addr.  |
                   1315:  *             +-------------------------------+
                   1316:  *     fp -->  | previous frame pointer        |
                   1317:  *             +-------------------------------+
                   1318:  *             |                               |
                   1319:  *             | caller's stack, saved regs,   |
                   1320:  *             | params.                       |
                   1321:  *             |                               |
                   1322:  *             +-------------------------------+
                   1323:  *     sp -->  | caller's return address       |
                   1324:  *             +-------------------------------+
                   1325:  *
                   1326:  * Recent versions of the compiler put the address of the pointer
                   1327:  * sized word in %edx.  Previous versions did not, but this code
                   1328:  * does not support them.
                   1329:  */
                   1330: 
                   1331: /*
                   1332:  * Note that OSF/rose blew defining _mcount, since it prepends leading
                   1333:  * underscores, and _mcount didn't have a second leading underscore.  However,
                   1334:  * some of the kernel/server functions 'know' that mcount has a leading
                   1335:  * underscore, so we satisfy both camps.
                   1336:  */
                   1337: 
                   1338: #if OLD_MCOUNT
                   1339:        .globl  mcount
                   1340:        .globl  _mcount
                   1341:        ELF_FUNC(mcount)
                   1342:        ELF_FUNC(_mcount)
                   1343:        .align  FALIGN
                   1344: _mcount:
                   1345: mcount:
                   1346: 
                   1347:        pushl   %ebx
                   1348:        MP_DISABLE_PREEMPTION(%ebx)
                   1349:        Vload
                   1350: 
                   1351: #if DO_STATS
                   1352:        SDINC(V_old_mcount(%ebx))
                   1353: #endif
                   1354: 
                   1355:        /* In calling the functions, we will actually leave 1 extra word on the */
                   1356:        /* top of the stack, but generated code will not notice, since the function */
                   1357:        /* uses a frame pointer */
                   1358: 
                   1359:        movl    V_mcount_ptr_ptr(%ebx),%ecx     /* address of mcount_ptr */
                   1360:        MP_ENABLE_PREEMPTION(%ebx)
                   1361:        popl    %ebx
                   1362:        movl    4(%ebp),%eax                    /* caller's caller return address */
                   1363:        xchgl   %eax,(%esp)                     /* push & get return address */
                   1364:        pushl   %eax                            /* push return address */
                   1365:        jmp     *(%ecx)                         /* go to profile the function */
                   1366: 
                   1367: End(mcount)
                   1368: End(_mcount)
                   1369: #endif
                   1370: 
                   1371: 
                   1372: #if !defined(KERNEL) && !defined(MACH_KERNEL)
                   1373: 
                   1374: /*
                   1375:  * Convert a 64-bit integer to a string.
                   1376:  * Arg #1 is a pointer to a string (at least 24 bytes) or NULL
                   1377:  * Arg #2 is the low part of the 64-bit integer.
                   1378:  * Arg #3 is the high part of the 64-bit integer.
                   1379:  */
                   1380: 
                   1381: Entry(_profile_cnt_to_decimal)
                   1382:        ENTER
                   1383:        pushl   %ebx
                   1384:        pushl   %esi
                   1385:        pushl   %edi
                   1386:        movl    Estack+16(%esp),%ebx            /* pointer or null */
                   1387:        movl    Estack+20(%esp),%edi            /* low part of number */
                   1388:        movl    $10,%ecx                        /* divisor */
                   1389:        cmpl    $0,%ebx                         /* skip if pointer ok */
                   1390:        jne     LCL(cvt_nonnull)
                   1391: 
                   1392:        MP_DISABLE_PREEMPTION(%ebx)
                   1393:        Vload                                   /* get _profile_vars address */
                   1394:        leal    V_num_buffer(%ebx),%ebx         /* temp buffer to use */
                   1395: 
                   1396:        .align  ALIGN
                   1397: LCL(cvt_nonnull):
                   1398:        addl    $(N_digit-1),%ebx               /* point string at end */
                   1399:        movb    $0,0(%ebx)                      /* null terminate string */
                   1400: 
                   1401: #if OVERFLOW
                   1402:        movl    Estack+24(%esp),%esi            /* high part of number */
                   1403:        cmpl    $0,%esi                         /* any thing left in high part? */
                   1404:        je      LCL(cvt_low)
                   1405: 
                   1406:        .align  ALIGN
                   1407: LCL(cvt_high):
                   1408:        movl    %esi,%eax                       /* calculate high/10 & high%10 */
                   1409:        xorl    %edx,%edx
                   1410:        divl    %ecx
                   1411:        movl    %eax,%esi
                   1412: 
                   1413:        movl    %edi,%eax                       /* calculate (low + (high%10)*2^32) / 10 */
                   1414:        divl    %ecx
                   1415:        movl    %eax,%edi
                   1416: 
                   1417:        decl    %ebx                            /* decrement string pointer */
                   1418:        addl    $48,%edx                        /* convert from 0..9 -> '0'..'9' */
                   1419:        movb    %dl,0(%ebx)                     /* store digit in string */
                   1420:        cmpl    $0,%esi                         /* any thing left in high part? */
                   1421:        jne     LCL(cvt_high)
                   1422: 
                   1423: #endif /* OVERFLOW */
                   1424: 
                   1425:        .align  ALIGN
                   1426: LCL(cvt_low):
                   1427:        movl    %edi,%eax                       /* get low part into %eax */
                   1428: 
                   1429:        .align  ALIGN
                   1430: LCL(cvt_low2):
                   1431:        xorl    %edx,%edx                       /* 0 */
                   1432:        divl    %ecx                            /* calculate next digit */
                   1433:        decl    %ebx                            /* decrement string pointer */
                   1434:        addl    $48,%edx                        /* convert from 0..9 -> '0'..'9' */
                   1435:        movb    %dl,0(%ebx)                     /* store digit in string */
                   1436:        cmpl    $0,%eax                         /* any more digits to convert? */
                   1437:        jne     LCL(cvt_low2)
                   1438: 
                   1439:        movl    %ebx,%eax                       /* return value */
                   1440:        popl    %edi
                   1441:        popl    %esi
                   1442:        MP_ENABLE_PREEMPTION(%ebx)
                   1443:        popl    %ebx
                   1444:        LEAVE0
                   1445:        ret
                   1446: 
                   1447: END(_profile_cnt_to_decimal)
                   1448: 
                   1449: #endif

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.