|
|
1.1 root 1: /*
2: * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3: *
4: * @APPLE_LICENSE_HEADER_START@
5: *
6: * The contents of this file constitute Original Code as defined in and
7: * are subject to the Apple Public Source License Version 1.1 (the
8: * "License"). You may not use this file except in compliance with the
9: * License. Please obtain a copy of the License at
10: * http://www.apple.com/publicsource and read it before using this file.
11: *
12: * This Original Code and all software distributed under the License are
13: * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14: * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15: * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16: * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17: * License for the specific language governing rights and limitations
18: * under the License.
19: *
20: * @APPLE_LICENSE_HEADER_END@
21: */
22: /*
23: * @OSF_COPYRIGHT@
24: */
25: /*
26: * HISTORY
27: *
28: * Revision 1.1.1.1 1998/09/22 21:05:49 wsanchez
29: * Import of Mac OS X kernel (~semeria)
30: *
31: * Revision 1.1.1.1 1998/03/07 02:26:08 wsanchez
32: * Import of OSF Mach kernel (~mburg)
33: *
34: * Revision 1.1.7.1 1997/09/22 17:41:24 barbou
35: * MP+RT: protect cpu_number() usage against preemption.
36: * [97/09/16 barbou]
37: *
38: * Revision 1.1.5.1 1995/01/06 19:53:37 devrcs
39: * mk6 CR668 - 1.3b26 merge
40: * new file for mk6
41: * [1994/10/12 22:25:20 dwm]
42: *
43: * Revision 1.1.2.2 1994/05/16 19:19:17 meissner
44: * Add support for converting 64-bit integers to a decimal string.
45: * Use the correct address (selfpc) when creating the prof header for gprof.
46: * [1994/04/28 21:44:59 meissner]
47: *
48: * Revision 1.1.2.1 1994/04/08 17:51:42 meissner
49: * Make most stats 64 bits, except for things like memory allocation.
50: * [1994/04/02 14:58:21 meissner]
51: *
52: * Do not provide old mcount support under MK or server.
53: * Fixup stats size so it is the same as in profile-md.h.
54: * [1994/03/29 21:00:03 meissner]
55: *
56: * Use faster sequence for overflow addition.
57: * Keep {dummy,prof,gprof,old}_mcount counts in double precision.
58: * Add kernel NCPUS > 1 support.
59: * [1994/03/17 20:13:23 meissner]
60: *
61: * Add gprof/prof overflow support
62: * [1994/03/17 14:56:44 meissner]
63: *
64: * Add size of histogram counters & unused fields to profile_profil struct
65: * [1994/02/17 21:41:44 meissner]
66: *
67: * Add too_low/too_high to profile_stats.
68: * [1994/02/16 22:38:11 meissner]
69: *
70: * Bump # allocation contexts to 32 from 16.
71: * Store unique ptr address in gprof function header structure for _profile_reset.
72: * Add new fields from profile-{internal,md}.h.
73: * Align loop looking for an unlocked acontext.
74: * Count # times a locked context block was found.
75: * Expand copyright.
76: * [1994/02/07 12:40:56 meissner]
77: *
78: * Keep track of the number of times the kernel overflows the HISTCOUNTER counter.
79: * [1994/02/03 20:13:23 meissner]
80: *
81: * Add stats for {user,kernel,idle} mode in the kernel.
82: * [1994/02/03 15:17:22 meissner]
83: *
84: * No change.
85: * [1994/02/03 00:58:49 meissner]
86: *
87: * Combine _profile_{vars,stats,md}; Allow more than one _profile_vars.
88: * [1994/02/01 12:03:56 meissner]
89: *
90: * Move _mcount_ptr to be closer to other data declarations.
91: * Add text_len to profile_profil structure for mk.
92: * Split records_cnt into prof_cnt/gprof_cnt.
93: * Always update prof_cnt/gprof_cnt even if not DO_STATS.
94: * Add current/max cpu indicator to stats for kernel.
95: * [1994/01/28 23:33:20 meissner]
96: *
97: * Don't do 4+Lgotoff(lab), use separate labels.
98: * Change GPROF_HASH_SHIFT to 9 (from 8).
99: * [1994/01/26 22:00:59 meissner]
100: *
101: * Fixup NO_RECURSIVE_ALLOC to do byte loads, not word loads.
102: * [1994/01/26 20:30:57 meissner]
103: *
104: * Move callback pointers into separate allocation context.
105: * Add size fields for other structures to profile-vars.
106: * Allocate string table as one large allocation.
107: * Rewrite old mcount code once again.
108: * Use multiply to make hash value, not divide.
109: * Hash table is now a power of two.
110: * [1994/01/26 20:23:32 meissner]
111: *
112: * Cut hash table size back to 16189.
113: * Add size fields to all structures.
114: * Add major/minor version number to _profile_md.
115: * Move allocation context block pointers to _profile_vars.
116: * Move _gprof_dummy after _profile_md.
117: * New function header code now falls into hash an element
118: * to avoid having the hash code duplicated or use a macro.
119: * Fix bug in _gprof_mcount with ELF shared libraries.
120: * [1994/01/25 01:45:59 meissner]
121: *
122: * Move init functions to C code; rearrange profil varaibles.
123: * [1994/01/22 01:11:14 meissner]
124: *
125: * No change.
126: * [1994/01/20 20:56:43 meissner]
127: *
128: * Fixup copyright.
129: * [1994/01/18 23:07:39 meissner]
130: *
131: * Make flags byte-sized.
132: * Add have_bb flag.
133: * Add init_format flag.
134: * Always put word size multipler first in .space.
135: * [1994/01/18 21:57:14 meissner]
136: *
137: * Fix elfpic problems in last change.
138: * [1994/01/16 14:04:26 meissner]
139: *
140: * Rewrite gprof caching to be faster & not need a lock.
141: * Record prof information for gprof too.
142: * Bump reserved stats to 64.
143: * Bump up hash table size 30799.
144: * Conditionally use lock prefix.
145: * Change most #ifdef's to #if.
146: * DEBUG_PROFILE turns on stack frames now.
147: * Conditionally add externs to gprof to determine where time is spent.
148: * Prof_mcount uses xchgl to update function pointer.
149: * [1994/01/15 18:40:33 meissner]
150: *
151: * Fix a comment.
152: * Separate statistics from debugging (though debugging turns it on).
153: * Remove debug code that traces each gprof request.
154: * [1994/01/15 00:59:02 meissner]
155: *
156: * Move max hash bucket calculation into _gprof_write & put info in stats structure.
157: * [1994/01/04 16:15:14 meissner]
158: *
159: * Use _profile_printf to write diagnostics; add diag_stream to hold stream to write to.
160: * [1994/01/04 15:37:44 meissner]
161: *
162: * Add more allocation memory pools (gprof function hdrs in particular).
163: * For prof, gprof arc, and gprof function hdrs, allocate 16 pages at a time.
164: * Add major/minor version numbers to _profile_{vars,stats}.
165: * Add # profil buckets field to _profil_stats.
166: * [19
167: *
168: * $EndLog$
169: */
170:
171: /*
172: * Common 386 profiling module that is shared between the kernel, mach
173: * servers, and the user space library. Each environment includes
174: * this file.
175: */
176:
177: .file "profile-asm.s"
178:
179: #include <cpus.h>
180:
181: #include <machine/asm.h>
182:
183: /*
184: * By default, debugging turns on statistics and stack frames.
185: */
186:
187: #if DEBUG_PROFILE
188: #ifndef DO_STATS
189: #define DO_STATS 1
190: #endif
191:
192: #ifndef STACK_FRAMES
193: #define STACK_FRAMES 1
194: #endif
195: #endif
196:
197: #ifndef OLD_MCOUNT
198: #define OLD_MCOUNT 0 /* do not compile old code for mcount */
199: #endif
200:
201: #ifndef DO_STATS
202: #define DO_STATS 1 /* compile in statistics code */
203: #endif
204:
205: #ifndef DO_LOCK
206: #define DO_LOCK 0 /* use lock; in front of increments */
207: #endif
208:
209: #ifndef LOCK_STATS
210: #define LOCK_STATS DO_LOCK /* update stats with lock set */
211: #endif
212:
213: #ifndef STACK_FRAMES
214: #define STACK_FRAMES 0 /* create stack frames for debugger */
215: #endif
216:
217: #ifndef NO_RECURSIVE_ALLOC
218: #define NO_RECURSIVE_ALLOC 0 /* check for recursive allocs */
219: /* (not thread safe!) */
220: #endif
221:
222: #ifndef MARK_GPROF
223: #define MARK_GPROF 0 /* add externs for gprof profiling */
224: #endif
225:
226: #ifndef OVERFLOW
227: #define OVERFLOW 1 /* add overflow checking support */
228: #endif
229:
230: /*
231: * Turn on the use of the lock prefix if desired.
232: */
233:
234: #ifndef LOCK
235: #if DO_LOCK
236: #define LOCK lock;
237: #else
238: #define LOCK
239: #endif
240: #endif
241:
242: #ifndef SLOCK
243: #if LOCK_STATS
244: #define SLOCK LOCK
245: #else
246: #define SLOCK
247: #endif
248: #endif
249:
250: /*
251: * Double or single precision incrementing
252: */
253:
254: #if OVERFLOW
255: #define DINC(mem) LOCK addl $1,mem; LOCK adcl $0,4+mem
256: #define DINC2(mem,mem2) LOCK addl $1,mem; LOCK adcl $0,mem2
257: #define SDINC(mem) SLOCK addl $1,mem; SLOCK adcl $0,4+mem
258: #define SDADD(val,mem) SLOCK addl val,mem; SLOCK adcl $0,4+mem
259: #define SDADDNEG(val,mem) SLOCK subl val,mem; SLOCK adcl $0,4+mem
260: #define SDSUB(val,mem) SLOCK subl val,mem; SLOCK sbbl $0,4+mem
261:
262: #else
263: #define DINC(mem) LOCK incl mem
264: #define DINC2(mem,mem2) LOCK incl mem
265: #define SDINC(mem) SLOCK incl mem
266: #define SDADD(val,mem) SLOCK addl val,mem
267: #define SDADDNEG(val,mem) SLOCK subl val,mem
268: #define SDSUB(val,mem) SLOCK subl val,mem
269: #endif
270:
271: /*
272: * Stack frame support so that debugger traceback works.
273: */
274:
275: #if STACK_FRAMES
276: #define ENTER pushl %ebp; movl %esp,%ebp
277: #define LEAVE0 popl %ebp
278: #define Estack 4
279: #else
280: #define ENTER
281: #define LEAVE0
282: #define Estack 0
283: #endif
284:
285: /*
286: * Gprof profiling.
287: */
288:
289: #if MARK_GPROF
290: #define MARK(name) .globl EXT(name); ELF_FUNC(EXT(name)); ELF_SIZE(EXT(name),0); LEXT(name)
291: #else
292: #define MARK(name)
293: #endif
294:
295: /*
296: * Profiling allocation context block. Each time memory is needed, the
297: * allocator loops until it finds an unlocked context block, and allocates
298: * from that block. If no context blocks are available, a new memory
299: * pool is allocated, and added to the end of the chain.
300: */
301:
302: LCL(A_next) = 0 /* next context block link (must be 0) */
303: LCL(A_plist) = LCL(A_next)+4 /* head of page list for context block */
304: LCL(A_lock) = LCL(A_plist)+4 /* lock word */
305: LCL(A_size) = LCL(A_lock)+4 /* size of context block */
306:
307: #define A_next LCL(A_next)
308: #define A_plist LCL(A_plist)
309: #define A_lock LCL(A_lock)
310: #define A_size LCL(A_size)
311:
312: /*
313: * Allocation contexts used.
314: */
315:
316: LCL(C_prof) = 0 /* prof records */
317: LCL(C_gprof) = 1 /* gprof arc records */
318: LCL(C_gfunc) = 2 /* gprof function headers */
319: LCL(C_misc) = 3 /* misc. allocations */
320: LCL(C_profil) = 4 /* memory for profil */
321: LCL(C_dci) = 5 /* memory for dci */
322: LCL(C_bb) = 6 /* memory for basic blocks */
323: LCL(C_callback) = 7 /* memory for callbacks */
324: LCL(C_max) = 32 /* # allocation contexts */
325:
326: #define C_prof LCL(C_prof)
327: #define C_gprof LCL(C_gprof)
328: #define C_gfunc LCL(C_gfunc)
329: #define C_max LCL(C_max)
330:
331: /*
332: * Linked list of memory allocations.
333: */
334:
335: LCL(M_first) = 0 /* pointer to first byte available */
336: LCL(M_ptr) = LCL(M_first)+4 /* pointer to next available byte */
337: LCL(M_next) = LCL(M_ptr)+4 /* next page allocated */
338: LCL(M_nfree) = LCL(M_next)+4 /* # bytes available */
339: LCL(M_nalloc) = LCL(M_nfree)+4 /* # bytes allocated */
340: LCL(M_num) = LCL(M_nalloc)+4 /* # allocations done on this page */
341: LCL(M_size) = LCL(M_num)+4 /* size of page header */
342:
343: #define M_first LCL(M_first)
344: #define M_ptr LCL(M_ptr)
345: #define M_next LCL(M_next)
346: #define M_nfree LCL(M_nfree)
347: #define M_nalloc LCL(M_nalloc)
348: #define M_num LCL(M_num)
349: #define M_size LCL(M_size)
350:
351: /*
352: * Prof data type.
353: */
354:
355: LCL(P_addr) = 0 /* function address */
356: LCL(P_count) = LCL(P_addr)+4 /* # times function called */
357: LCL(P_overflow) = LCL(P_count)+4 /* # times count overflowed */
358: LCL(P_size) = LCL(P_overflow)+4 /* size of prof data type */
359:
360: #define P_addr LCL(P_addr)
361: #define P_count LCL(P_count)
362: #define P_overflow LCL(P_overflow)
363: #define P_size LCL(P_size)
364:
365: /*
366: * Gprof data type.
367: */
368:
369: LCL(G_next) = 0 /* next hash link (must be 0) */
370: LCL(G_frompc) = LCL(G_next)+4 /* caller's caller */
371: LCL(G_selfpc) = LCL(G_frompc)+4 /* caller's address */
372: LCL(G_count) = LCL(G_selfpc)+4 /* # times arc traversed */
373: LCL(G_overflow) = LCL(G_count)+4 /* # times count overflowed */
374: LCL(G_size) = LCL(G_overflow)+4 /* size of gprof data type */
375:
376: #define G_next LCL(G_next)
377: #define G_frompc LCL(G_frompc)
378: #define G_selfpc LCL(G_selfpc)
379: #define G_count LCL(G_count)
380: #define G_overflow LCL(G_overflow)
381: #define G_size LCL(G_size)
382:
383: /*
384: * Gprof header.
385: *
386: * At least one header is allocated for each unique function that is profiled.
387: * In order to save time calculating the hash value, the last H_maxcache
388: * distinct arcs are cached within this structure. Also, to avoid loading
389: * the GOT when searching the hash table, we copy the hash pointer to this
390: * structure, so that we only load the GOT when we need to allocate an arc.
391: */
392:
393: LCL(H_maxcache) = 3 /* # of cache table entries */
394: LCL(H_csize) = 4*LCL(H_maxcache) /* size of each cache array */
395:
396: LCL(H_hash_ptr) = 0 /* hash table to use */
397: LCL(H_unique_ptr) = LCL(H_hash_ptr)+4 /* function unique pointer */
398: LCL(H_prof) = LCL(H_unique_ptr)+4 /* prof statistics */
399: LCL(H_cache_ptr) = LCL(H_prof)+P_size /* cache table of element pointers */
400: LCL(H_size) = LCL(H_cache_ptr)+LCL(H_csize) /* size of gprof header type */
401:
402: #define H_maxcache LCL(H_maxcache)
403: #define H_csize LCL(H_csize)
404: #define H_hash_ptr LCL(H_hash_ptr)
405: #define H_unique_ptr LCL(H_unique_ptr)
406: #define H_prof LCL(H_prof)
407: #define H_cache_ptr LCL(H_cache_ptr)
408: #define H_size LCL(H_size)
409:
410: /*
411: * Number of digits needed to write a 64 bit number including trailing null.
412: * (rounded up to be divisable by 4).
413: */
414:
415: #define N_digit 24
416:
417:
418: .data
419:
420: /*
421: * Default gprof hash table size, which must be a power of two.
422: * The shift specifies how many low order bits to eliminate when
423: * calculating the hash value.
424: */
425:
426: #ifndef GPROF_HASH_SIZE
427: #define GPROF_HASH_SIZE 16384
428: #endif
429:
430: #ifndef GPROF_HASH_SHIFT
431: #define GPROF_HASH_SHIFT 9
432: #endif
433:
434: #define GPROF_HASH_MASK (GPROF_HASH_SIZE-1)
435:
436: DATA(_profile_hash_size)
437: .long GPROF_HASH_SIZE
438: ENDDATA(_profile_hash_size)
439:
440:
441:
442: /*
443: * Pointer that the compiler uses to call to the appropriate mcount function.
444: */
445:
446: DATA(_mcount_ptr)
447: .long EXT(_dummy_mcount)
448: ENDDATA(_mcount_ptr)
449:
450: /*
451: * Global profile variables. The structure that accesses this in C is declared
452: * in profile-internal.h. All items in .data that follow this will be used as
453: * one giant record, and each unique machine, thread, kgmon output or what have
454: * you will create a separate instance. Typically there is only one instance
455: * which will be the memory laid out below.
456: */
457:
458: LCL(var_major_version) = 0 /* major version number */
459: LCL(var_minor_version) = LCL(var_major_version)+4 /* minor version number */
460: LCL(vars_size) = LCL(var_minor_version)+4 /* size of _profile_vars structure */
461: LCL(plist_size) = LCL(vars_size)+4 /* size of page_list structure */
462: LCL(acontext_size) = LCL(plist_size)+4 /* size of allocation contexts */
463: LCL(callback_size) = LCL(acontext_size)+4 /* size of callback structure */
464: LCL(type) = LCL(callback_size)+4 /* profile type (gprof, prof) */
465: LCL(error_msg) = LCL(type)+4 /* error message for perror */
466: LCL(filename) = LCL(error_msg)+4 /* filename to write to */
467: LCL(str_ptr) = LCL(filename)+4 /* string table pointer */
468: LCL(stream) = LCL(str_ptr)+4 /* stdio stream to write to */
469: LCL(diag_stream) = LCL(stream)+4 /* stdio stream to write diagnostics to */
470: LCL(fwrite_func) = LCL(diag_stream)+4 /* function like fwrite to output bytes */
471: LCL(page_size) = LCL(fwrite_func)+4 /* page size in bytes */
472: LCL(str_bytes) = LCL(page_size)+4 /* # bytes in string table */
473: LCL(str_total) = LCL(str_bytes)+4 /* # total bytes allocated for string table */
474: LCL(clock_ticks) = LCL(str_total)+4 /* # clock ticks per second */
475:
476: /* profil variables */
477: LCL(profil_start) = LCL(clock_ticks)+4 /* start of profil variables */
478: LCL(lowpc) = LCL(clock_ticks)+4 /* lowest address */
479: LCL(highpc) = LCL(lowpc)+4 /* highest address */
480: LCL(text_len) = LCL(highpc)+4 /* highpc-lowpc */
481: LCL(profil_len) = LCL(text_len)+4 /* size of profil buffer */
482: LCL(counter_size) = LCL(profil_len)+4 /* size of indivual counter */
483: LCL(scale) = LCL(counter_size)+4 /* scale factor */
484: LCL(profil_unused) = LCL(scale)+4 /* unused fields */
485: LCL(profil_end) = LCL(profil_unused)+4*8 /* end of profil_info structure */
486: LCL(profil_buf) = LCL(profil_end) /* buffer for profil */
487:
488: /* Output selection func ptrs */
489: LCL(output_init) = LCL(profil_buf)+4 /* Initialization */
490: LCL(output) = LCL(output_init)+4 /* Write out profiling info */
491: LCL(output_ptr) = LCL(output)+4 /* Output specific data ptr */
492:
493: /* Memory allocation support */
494: LCL(acontext) = LCL(output_ptr)+4 /* pointers to allocation context blocks */
495:
496: LCL(bogus_func) = LCL(acontext)+4*C_max /* function to use if gprof arc is bad */
497: LCL(vars_unused) = LCL(bogus_func)+4 /* future growth */
498:
499: /* flags */
500: LCL(init) = LCL(vars_unused)+4*63 /* whether initializations were done */
501: LCL(active) = LCL(init)+1 /* whether profiling is active */
502: LCL(do_profile) = LCL(active)+1 /* whether to do profiling */
503: LCL(use_dci) = LCL(do_profile)+1 /* whether to use DCI */
504: LCL(use_profil) = LCL(use_dci)+1 /* whether to use profil */
505: LCL(recursive_alloc) = LCL(use_profil)+1 /* alloc called recursively */
506: LCL(output_uarea) = LCL(recursive_alloc)+1 /* output uarea */
507: LCL(output_stats) = LCL(output_uarea)+1 /* output stats info */
508: LCL(output_clock) = LCL(output_stats)+1 /* output the clock ticks */
509: LCL(multiple_sections) = LCL(output_clock)+1 /* multiple sections are ok */
510: LCL(have_bb) = LCL(multiple_sections)+1 /* whether we have basic block data */
511: LCL(init_format) = LCL(have_bb)+1 /* The output format has been chosen */
512: LCL(debug) = LCL(init_format)+1 /* Whether or not we are debugging */
513: LCL(check_funcs) = LCL(debug)+1 /* Whether to check functions for validity */
514: LCL(flag_unused) = LCL(check_funcs)+1 /* unused flags */
515: LCL(end_of_vars) = LCL(flag_unused)+62 /* size of machine independent vars */
516:
517: /*
518: * Data that contains profile statistics that can be dumped out
519: * into the {,g}mon.out file. This is defined in profile-md.h.
520: */
521:
522: LCL(stats_start) = LCL(end_of_vars) /* start of stats substructure */
523: LCL(stats_major_version)= LCL(stats_start) /* major version number */
524: LCL(stats_minor_version)= LCL(stats_major_version)+4 /* minor version number */
525: LCL(stats_size) = LCL(stats_minor_version)+4 /* size of _profile_stats structure */
526: LCL(profil_buckets) = LCL(stats_size)+4 /* # profil buckets */
527: LCL(my_cpu) = LCL(profil_buckets)+4 /* identify which cpu/thread this is */
528: LCL(max_cpu) = LCL(my_cpu)+4 /* identify which cpu/thread this is */
529: LCL(prof_records) = LCL(max_cpu)+4 /* # of profiled functions */
530: LCL(gprof_records) = LCL(prof_records)+4 /* # of gprof arcs created */
531: LCL(hash_buckets) = LCL(gprof_records)+4 /* max gprof hash buckets on a chain */
532: LCL(bogus_count) = LCL(hash_buckets)+4 /* # bogus functions found in gprof */
533:
534: LCL(cnt) = LCL(bogus_count)+4 /* # of _{prof,gprof}_mcount calls */
535: LCL(dummy) = LCL(cnt)+8 /* # of _dummy_mcount calls */
536: LCL(old_mcount) = LCL(dummy)+8 /* # of old mcount calls */
537: LCL(hash_search) = LCL(old_mcount)+8 /* # gprof hash buckets searched */
538: LCL(hash_num) = LCL(hash_search)+8 /* # times hash table searched */
539: LCL(user_ticks) = LCL(hash_num)+8 /* # ticks within user space */
540: LCL(kernel_ticks) = LCL(user_ticks)+8 /* # ticks within kernel space */
541: LCL(idle_ticks) = LCL(kernel_ticks)+8 /* # ticks cpu was idle */
542: LCL(overflow_ticks) = LCL(idle_ticks)+8 /* # ticks where histcounter overflowed */
543: LCL(acontext_locked) = LCL(overflow_ticks)+8 /* # times an acontext was locked */
544: LCL(too_low) = LCL(acontext_locked)+8 /* # times histogram tick too low */
545: LCL(too_high) = LCL(too_low)+8 /* # times histogram tick too low */
546: LCL(prof_overflow) = LCL(too_high)+8 /* # times the prof count field overflowed */
547: LCL(gprof_overflow) = LCL(prof_overflow)+8 /* # times the gprof count field overflowed */
548: LCL(num_alloc) = LCL(gprof_overflow)+8 /* # allocations in each context */
549: LCL(bytes_alloc) = LCL(num_alloc)+4*C_max /* bytes allocated in each context */
550: LCL(num_context) = LCL(bytes_alloc)+4*C_max /* # allocation context blocks */
551: LCL(wasted) = LCL(num_context)+4*C_max /* # bytes wasted */
552: LCL(overhead) = LCL(wasted)+4*C_max /* # bytes of overhead */
553: LCL(buckets) = LCL(overhead)+4*C_max /* # hash indexes that have n buckets */
554: LCL(cache_hits1) = LCL(buckets)+4*10 /* # gprof cache hits in bucket #1 */
555: LCL(cache_hits2) = LCL(cache_hits1)+8 /* # gprof cache hits in bucket #2 */
556: LCL(cache_hits3) = LCL(cache_hits2)+8 /* # gprof cache hits in bucket #3 */
557: LCL(stats_unused) = LCL(cache_hits3)+8 /* reserved for future use */
558: LCL(stats_end) = LCL(stats_unused)+8*64 /* end of stats structure */
559:
560: /*
561: * Machine dependent variables that no C file should access (except for
562: * profile-md.c).
563: */
564:
565: LCL(md_start) = LCL(stats_end) /* start of md structure */
566: LCL(md_major_version) = LCL(md_start) /* major version number */
567: LCL(md_minor_version) = LCL(md_major_version)+4 /* minor version number */
568: LCL(md_size) = LCL(md_minor_version)+4 /* size of _profile_stats structure */
569: LCL(hash_ptr) = LCL(md_size)+4 /* gprof hash pointer */
570: LCL(hash_size) = LCL(hash_ptr)+4 /* gprof hash size */
571: LCL(num_cache) = LCL(hash_size)+4 /* # of cache entries */
572: LCL(save_mcount_ptr) = LCL(num_cache)+4 /* save for mcount_ptr when suspending profiling */
573: LCL(mcount_ptr_ptr) = LCL(save_mcount_ptr)+4 /* pointer to _mcount_ptr */
574: LCL(dummy_ptr) = LCL(mcount_ptr_ptr)+4 /* pointer to gprof_dummy */
575: LCL(alloc_pages) = LCL(dummy_ptr)+4 /* allocate more memory */
576: LCL(num_buffer) = LCL(alloc_pages)+4 /* buffer to convert 64 bit ints in */
577: LCL(md_unused) = LCL(num_buffer)+N_digit /* unused fields */
578: LCL(md_end) = LCL(md_unused)+4*58 /* end of md structure */
579: LCL(total_size) = LCL(md_end) /* size of entire structure */
580:
581: /*
582: * Size of the entire _profile_vars structure.
583: */
584:
585: DATA(_profile_size)
586: .long LCL(total_size)
587: ENDDATA(_profile_size)
588:
589: /*
590: * Size of the statistics substructure.
591: */
592:
593: DATA(_profile_stats_size)
594: .long LCL(stats_end)-LCL(stats_start)
595: ENDDATA(_profile_stats_size)
596:
597: /*
598: * Size of the profil info substructure.
599: */
600:
601: DATA(_profile_profil_size)
602: .long LCL(profil_end)-LCL(profil_start)
603: ENDDATA(_profile_profil_size)
604:
605: /*
606: * Size of the machine dependent substructure.
607: */
608:
609: DATA(_profile_md_size)
610: .long LCL(md_end)-LCL(md_start)
611: ENDDATA(_profile_profil_size)
612:
613: /*
614: * Whether statistics are supported.
615: */
616:
617: DATA(_profile_do_stats)
618: .long DO_STATS
619: ENDDATA(_profile_do_stats)
620:
621: .text
622:
623: /*
624: * Map LCL(xxx) -> into simpler names
625: */
626:
627: #define V_acontext LCL(acontext)
628: #define V_acontext_locked LCL(acontext_locked)
629: #define V_alloc_pages LCL(alloc_pages)
630: #define V_bogus_func LCL(bogus_func)
631: #define V_bytes_alloc LCL(bytes_alloc)
632: #define V_cache_hits1 LCL(cache_hits1)
633: #define V_cache_hits2 LCL(cache_hits2)
634: #define V_cache_hits3 LCL(cache_hits3)
635: #define V_cnt LCL(cnt)
636: #define V_cnt_overflow LCL(cnt_overflow)
637: #define V_check_funcs LCL(check_funcs)
638: #define V_dummy LCL(dummy)
639: #define V_dummy_overflow LCL(dummy_overflow)
640: #define V_dummy_ptr LCL(dummy_ptr)
641: #define V_gprof_records LCL(gprof_records)
642: #define V_hash_num LCL(hash_num)
643: #define V_hash_ptr LCL(hash_ptr)
644: #define V_hash_search LCL(hash_search)
645: #define V_mcount_ptr_ptr LCL(mcount_ptr_ptr)
646: #define V_num_alloc LCL(num_alloc)
647: #define V_num_buffer LCL(num_buffer)
648: #define V_num_context LCL(num_context)
649: #define V_old_mcount LCL(old_mcount)
650: #define V_old_mcount_overflow LCL(old_mcount_overflow)
651: #define V_overhead LCL(overhead)
652: #define V_page_size LCL(page_size)
653: #define V_prof_records LCL(prof_records)
654: #define V_recursive_alloc LCL(recursive_alloc)
655: #define V_wasted LCL(wasted)
656:
657: /*
658: * Loadup %ebx with the address of _profile_vars. On a multiprocessor, this
659: * will loads up the appropriate machine's _profile_vars structure.
660: * For ELF shared libraries, rely on the fact that we won't need a GOT,
661: * except to load this pointer.
662: */
663:
664: #if defined (MACH_KERNEL) && NCPUS > 1
665: #define ASSEMBLER
666: #if AT386
667: #include <i386/AT386/mp.h>
668: #endif
669:
670: #if SQT
671: #include <i386/SQT/asm_macros.h>
672: #endif
673:
674: #ifndef CPU_NUMBER
675: #error "Cannot determine how to get CPU number"
676: #endif
677:
678: #define Vload CPU_NUMBER(%ebx); movl EXT(_profile_vars_cpus)(,%ebx,4),%ebx
679:
680: #else /* not kernel or not multiprocessor */
681: #define Vload Gload; Egaddr(%ebx,_profile_vars)
682: #endif
683:
684:
685: /*
686: * Allocate some memory for profiling. This memory is guaranteed to
687: * be zero.
688: * %eax contains the memory size requested and will contain ptr on exit.
689: * %ebx contains the address of the appropriate profile_vars structure.
690: * %ecx is the number of the memory pool to allocate from (trashed on exit).
691: * %edx is trashed.
692: * %esi is preserved.
693: * %edi is preserved.
694: * %ebp is preserved.
695: */
696:
697: Entry(_profile_alloc_asm)
698: ENTER
699: pushl %esi
700: pushl %edi
701:
702: movl %ecx,%edi /* move context number to saved reg */
703:
704: #if NO_RECURSIVE_ALLOC
705: movb $-1,%cl
706: xchgb %cl,V_recursive_alloc(%ebx)
707: cmpb $0,%cl
708: je LCL(no_recurse)
709:
710: int $3
711:
712: .align ALIGN
713: LCL(no_recurse):
714: #endif
715:
716: leal V_acontext(%ebx,%edi,4),%ecx
717:
718: /* Loop looking for a free allocation context. */
719: /* %eax = size, %ebx = vars addr, %ecx = ptr to allocation context to try */
720: /* %edi = context number */
721:
722: .align ALIGN
723: LCL(alloc_loop):
724: movl %ecx,%esi /* save ptr in case no more contexts */
725: movl A_next(%ecx),%ecx /* next context block */
726: cmpl $0,%ecx
727: je LCL(alloc_context) /* need to allocate a new context block */
728:
729: movl $-1,%edx
730: xchgl %edx,A_lock(%ecx) /* %edx == 0 if context available */
731:
732: #if DO_STATS
733: SDADDNEG(%edx,V_acontext_locked(%ebx)) /* increment counter if lock was held */
734: #endif
735:
736: cmpl $0,%edx
737: jne LCL(alloc_loop) /* go back if this context block is not available */
738:
739: /* Allocation context found (%ecx), now allocate. */
740: movl A_plist(%ecx),%edx /* pointer to current block */
741: cmpl $0,%edx /* first allocation? */
742: je LCL(alloc_new)
743:
744: cmpl %eax,M_nfree(%edx) /* see if we have enough space */
745: jl LCL(alloc_new) /* jump if not enough space */
746:
747: /* Allocate from local block (and common exit) */
748: /* %eax = bytes to allocate, %ebx = GOT, %ecx = context, %edx = memory block */
749: /* %edi = context number */
750:
751: .align ALIGN
752: LCL(alloc_ret):
753:
754: #if DO_STATS
755: SLOCK incl V_num_alloc(%ebx,%edi,4) /* update global counters */
756: SLOCK addl %eax,V_bytes_alloc(%ebx,%edi,4)
757: SLOCK subl %eax,V_wasted(%ebx,%edi,4)
758: #endif
759:
760: movl M_ptr(%edx),%esi /* pointer return value */
761: subl %eax,M_nfree(%edx) /* decrement bytes remaining */
762: addl %eax,M_nalloc(%edx) /* increment bytes allocated */
763: incl M_num(%edx) /* increment # allocations */
764: addl %eax,M_ptr(%edx) /* advance pointer */
765: movl $0,A_lock(%ecx) /* unlock context block */
766: movl %esi,%eax /* return pointer */
767:
768: #if NO_RECURSIVE_ALLOC
769: movb $0,V_recursive_alloc(%ebx)
770: #endif
771:
772: popl %edi
773: popl %esi
774: LEAVE0
775: ret /* return to the caller */
776:
777: /* Allocate space in whole number of pages */
778: /* %eax = bytes to allocate, %ebx = vars address, %ecx = context */
779: /* %edi = context number */
780:
781: .align ALIGN
782: LCL(alloc_new):
783: pushl %eax /* save regs */
784: pushl %ecx
785: movl V_page_size(%ebx),%edx
786: addl $(M_size-1),%eax /* add in overhead size & subtract 1 */
787: decl %edx /* page_size - 1 */
788: addl %edx,%eax /* round up to whole number of pages */
789: notl %edx
790: andl %edx,%eax
791: leal -M_size(%eax),%esi /* save allocation size */
792: pushl %eax /* argument to _profile_alloc_pages */
793: call *V_alloc_pages(%ebx) /* allocate some memory */
794: addl $4,%esp /* pop off argument */
795:
796: #if DO_STATS
797: SLOCK addl %esi,V_wasted(%ebx,%edi,4) /* udpate global counters */
798: SLOCK addl $M_size,V_overhead(%ebx,%edi,4)
799: #endif
800:
801: popl %ecx /* context block */
802: movl %eax,%edx /* memory block pointer */
803: movl %esi,M_nfree(%edx) /* # free bytes */
804: addl $M_size,%eax /* bump past overhead */
805: movl A_plist(%ecx),%esi /* previous memory block or 0 */
806: movl %eax,M_first(%edx) /* first space available */
807: movl %eax,M_ptr(%edx) /* current address available */
808: movl %esi,M_next(%edx) /* next memory block allocated */
809: movl %edx,A_plist(%ecx) /* update current page list */
810: popl %eax /* user size request */
811: jmp LCL(alloc_ret) /* goto common return code */
812:
813: /* Allocate a context header in addition to memory block header + data */
814: /* %eax = bytes to allocate, %ebx = GOT, %esi = ptr to store context ptr */
815: /* %edi = context number */
816:
817: .align ALIGN
818: LCL(alloc_context):
819: pushl %eax /* save regs */
820: pushl %esi
821: movl V_page_size(%ebx),%edx
822: addl $(A_size+M_size-1),%eax /* add in overhead size & subtract 1 */
823: decl %edx /* page_size - 1 */
824: addl %edx,%eax /* round up to whole number of pages */
825: notl %edx
826: andl %edx,%eax
827: leal -A_size-M_size(%eax),%esi /* save allocation size */
828: pushl %eax /* argument to _profile_alloc_pages */
829: call *V_alloc_pages(%ebx) /* allocate some memory */
830: addl $4,%esp /* pop off argument */
831:
832: #if DO_STATS
833: SLOCK incl V_num_context(%ebx,%edi,4) /* bump # context blocks */
834: SLOCK addl %esi,V_wasted(%ebx,%edi,4) /* update global counters */
835: SLOCK addl $(A_size+M_size),V_overhead(%ebx,%edi,4)
836: #endif
837:
838: movl %eax,%ecx /* context pointer */
839: leal A_size(%eax),%edx /* memory block pointer */
840: movl %esi,M_nfree(%edx) /* # free bytes */
841: addl $(A_size+M_size),%eax /* bump past overhead */
842: movl %eax,M_first(%edx) /* first space available */
843: movl %eax,M_ptr(%edx) /* current address available */
844: movl $0,M_next(%edx) /* next memory block allocated */
845: movl %edx,A_plist(%ecx) /* head of memory block list */
846: movl $1,A_lock(%ecx) /* set lock */
847: popl %esi /* ptr to store context block link */
848: movl %ecx,%eax /* context pointer temp */
849: xchgl %eax,A_next(%esi) /* link into chain */
850: movl %eax,A_next(%ecx) /* add links in case of threading */
851: popl %eax /* user size request */
852: jmp LCL(alloc_ret) /* goto common return code */
853:
854: END(_profile_alloc_asm)
855:
856: /*
857: * C callable version of the profile memory allocator.
858: * extern void *_profile_alloc(struct profile_vars *, size_t, acontext_type_t);
859: */
860:
861: Entry(_profile_alloc)
862: ENTER
863: pushl %ebx
864: movl 12+Estack(%esp),%eax /* memory size */
865: movl 8+Estack(%esp),%ebx /* provile_vars address */
866: addl $3,%eax /* round up to word boundary */
867: movl 16+Estack(%esp),%ecx /* which memory pool to allocate from */
868: andl $0xfffffffc,%eax
869: call EXT(_profile_alloc_asm)
870: popl %ebx
871: LEAVE0
872: ret
873: END(_profile_alloc)
874:
875:
876: /*
877: * Dummy mcount routine that just returns.
878: *
879: * +-------------------------------+
880: * | |
881: * | |
882: * | caller's caller stack, |
883: * | saved registers, params. |
884: * | |
885: * | |
886: * +-------------------------------+
887: * | caller's caller return addr. |
888: * +-------------------------------+
889: * esp --> | caller's return address |
890: * +-------------------------------+
891: *
892: * edx --> function unqiue LCL
893: */
894:
895: Entry(_dummy_mcount)
896: ENTER
897:
898: #if DO_STATS
899: pushl %ebx
900: MP_DISABLE_PREEMPTION(%ebx)
901: Vload
902: SDINC(V_dummy(%ebx))
903: MP_ENABLE_PREEMPTION(%ebx)
904: popl %ebx
905: #endif
906:
907: LEAVE0
908: ret
909: END(_dummy_mcount)
910:
911:
912: /*
913: * Entry point for System V based profiling, count how many times each function
914: * is called. The function label is passed in %edx, and the top two words on
915: * the stack are the caller's address, and the caller's return address.
916: *
917: * +-------------------------------+
918: * | |
919: * | |
920: * | caller's caller stack, |
921: * | saved registers, params. |
922: * | |
923: * | |
924: * +-------------------------------+
925: * | caller's caller return addr. |
926: * +-------------------------------+
927: * esp --> | caller's return address |
928: * +-------------------------------+
929: *
930: * edx --> function unique label
931: *
932: * We don't worry about the possibility about two threads calling
933: * the same function for the first time simulataneously. If that
934: * happens, two records will be created, and one of the records
935: * address will be stored in in the function unique label (which
936: * is aligned by the compiler, so we don't have to watch out for
937: * crossing page/cache boundaries).
938: */
939:
940: Entry(_prof_mcount)
941: ENTER
942:
943: #if DO_STATS
944: pushl %ebx
945: MP_DISABLE_PREEMPTION(%ebx)
946: Vload
947: SDINC(V_cnt(%ebx))
948: #endif
949:
950: movl (%edx),%eax /* initialized? */
951: cmpl $0,%eax
952: je LCL(pnew)
953:
954: DINC2(P_count(%eax),P_overflow(%eax)) /* bump function count (double precision) */
955:
956: #if DO_STATS
957: MP_ENABLE_PREEMPTION(%ebx)
958: popl %ebx
959: #endif
960:
961: LEAVE0
962: ret
963:
964: .align ALIGN
965: LCL(pnew):
966:
967: #if !DO_STATS
968: pushl %ebx
969: MP_DISABLE_PREEMPTION(%ebx)
970: Vload
971: #endif
972:
973: SLOCK incl V_prof_records(%ebx)
974: pushl %edx
975: movl $P_size,%eax /* allocation size */
976: movl $C_prof,%ecx /* allocation pool */
977: call EXT(_profile_alloc_asm) /* allocate a new record */
978: popl %edx
979:
980: movl Estack+4(%esp),%ecx /* caller's address */
981: movl %ecx,P_addr(%eax)
982: movl $1,P_count(%eax) /* call count */
983: xchgl %eax,(%edx) /* update function header */
984: MP_ENABLE_PREEMPTION(%ebx)
985: popl %ebx
986: LEAVE0
987: ret
988:
989: END(_prof_mcount)
990:
991:
992: /*
993: * Entry point for BSD based graph profiling, count how many times each unique
994: * call graph (caller + callee) is called. The function label is passed in
995: * %edx, and the top two words on the stack are the caller's address, and the
996: * caller's return address.
997: *
998: * +-------------------------------+
999: * | |
1000: * | |
1001: * | caller's caller stack, |
1002: * | saved registers, params. |
1003: * | |
1004: * | |
1005: * +-------------------------------+
1006: * | caller's caller return addr. |
1007: * +-------------------------------+
1008: * esp --> | caller's return address |
1009: * +-------------------------------+
1010: *
1011: * edx --> function unqiue label
1012: *
1013: * We don't worry about the possibility about two threads calling the same
1014: * function simulataneously. If that happens, two records will be created, and
1015: * one of the records address will be stored in in the function unique label
1016: * (which is aligned by the compiler).
1017: *
1018: * By design, the gprof header is not locked. Each of the cache pointers is
1019: * always a valid pointer (possibily to a null record), and if another thread
1020: * comes in and modifies the pointer, it does so automatically with a simple store.
1021: * Since all arcs are in the hash table, the caches are just to avoid doing
1022: * a multiplication in the common case, and if they don't match, the arcs will
1023: * still be found.
1024: */
1025:
1026: Entry(_gprof_mcount)
1027:
1028: ENTER
1029: movl Estack+4(%esp),%ecx /* caller's caller address */
1030:
1031: #if DO_STATS
1032: pushl %ebx
1033: MP_DISABLE_PREEMPTION(%ebx)
1034: Vload
1035: SDINC(V_cnt(%ebx)) /* bump profile call counter (double int) */
1036: #endif
1037:
1038: movl (%edx),%eax /* Gprof header allocated? */
1039: cmpl $0,%eax
1040: je LCL(gnew) /* skip if first call */
1041:
1042: DINC2(H_prof+P_count(%eax),H_prof+P_overflow(%eax)) /* bump function count */
1043:
1044: /* See if this call arc is the same as the last time */
1045: MARK(_gprof_mcount_cache1)
1046: movl H_cache_ptr(%eax),%edx /* last arc searched */
1047: cmpl %ecx,G_frompc(%edx) /* skip if not equal */
1048: jne LCL(gcache2)
1049:
1050: /* Same as last time, increment and return */
1051:
1052: DINC2(G_count(%edx),G_overflow(%edx)) /* bump arc count */
1053:
1054: #if DO_STATS
1055: SDINC(V_cache_hits1(%ebx)) /* update counter */
1056: MP_ENABLE_PREEMPTION(%ebx)
1057: popl %ebx
1058: #endif
1059:
1060: LEAVE0
1061: ret
1062:
1063: /* Search second cache entry */
1064: /* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1065: /* %edx = first arc searched */
1066: /* %ebx if DO_STATS pushed on stack */
1067:
1068: .align ALIGN
1069: MARK(_gprof_mcount_cache2)
1070: LCL(gcache2):
1071: pushl %esi /* get a saved register */
1072: movl H_cache_ptr+4(%eax),%esi /* 2nd arc to be searched */
1073: cmpl %ecx,G_frompc(%esi) /* skip if not equal */
1074: jne LCL(gcache3)
1075:
1076: /* Element found, increment, reset last arc searched and return */
1077:
1078: DINC2(G_count(%esi),G_overflow(%esi)) /* bump arc count */
1079:
1080: movl %esi,H_cache_ptr+0(%eax) /* swap 1st and 2nd cached arcs */
1081: popl %esi
1082: movl %edx,H_cache_ptr+4(%eax)
1083:
1084: #if DO_STATS
1085: SDINC(V_cache_hits2(%ebx)) /* update counter */
1086: MP_ENABLE_PREEMPTION(%ebx)
1087: popl %ebx
1088: #endif
1089:
1090: LEAVE0
1091: ret
1092:
1093: /* Search third cache entry */
1094: /* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1095: /* %edx = first arc searched, %esi = second arc searched */
1096: /* %esi, %ebx if DO_STATS pushed on stack */
1097:
1098: .align ALIGN
1099: MARK(_gprof_mcount_cache3)
1100: LCL(gcache3):
1101: pushl %edi
1102: movl H_cache_ptr+8(%eax),%edi /* 3rd arc to be searched */
1103: cmpl %ecx,G_frompc(%edi) /* skip if not equal */
1104: jne LCL(gnocache)
1105:
1106: /* Element found, increment, reset last arc searched and return */
1107:
1108: DINC2(G_count(%edi),G_overflow(%edi)) /* bump arc count */
1109:
1110: movl %edi,H_cache_ptr+0(%eax) /* make this 1st cached arc */
1111: movl %esi,H_cache_ptr+8(%eax)
1112: movl %edx,H_cache_ptr+4(%eax)
1113: popl %edi
1114: popl %esi
1115:
1116: #if DO_STATS
1117: SDINC(V_cache_hits3(%ebx)) /* update counter */
1118: MP_ENABLE_PREEMPTION(%ebx)
1119: popl %ebx
1120: #endif
1121:
1122: LEAVE0
1123: ret
1124:
1125: /* No function context, allocate a new context */
1126: /* %ebx is the variables address if DO_STATS */
1127: /* %ecx is the caller's caller's address */
1128: /* %edx is the unique function pointer */
1129: /* %ebx if DO_STATS pushed on stack */
1130:
1131: .align ALIGN
1132: MARK(_gprof_mcount_new)
1133: LCL(gnew):
1134: pushl %esi
1135: pushl %edi
1136:
1137: #if !DO_STATS
1138: pushl %ebx /* Address of vars needed for alloc */
1139: MP_DISABLE_PREEMPTION(%ebx)
1140: Vload /* stats already loaded address */
1141: #endif
1142:
1143: SLOCK incl V_prof_records(%ebx)
1144: movl %edx,%esi /* save unique function ptr */
1145: movl %ecx,%edi /* and caller's caller address */
1146: movl $H_size,%eax /* memory block size */
1147: movl $C_gfunc,%ecx /* gprof function header memory pool */
1148: call EXT(_profile_alloc_asm)
1149:
1150: movl V_hash_ptr(%ebx),%ecx /* copy hash_ptr to func header */
1151: movl V_dummy_ptr(%ebx),%edx /* dummy cache entry */
1152: movl %ecx,H_hash_ptr(%eax)
1153: movl %edx,H_cache_ptr+0(%eax) /* store dummy cache ptrs */
1154: movl %edx,H_cache_ptr+4(%eax)
1155: movl %edx,H_cache_ptr+8(%eax)
1156: movl %esi,H_unique_ptr(%eax) /* remember function unique ptr */
1157: movl Estack+12(%esp),%ecx /* caller's address */
1158: movl $1,H_prof+P_count(%eax) /* function called once so far */
1159: movl %ecx,H_prof+P_addr(%eax) /* set up prof information */
1160: movl %eax,(%esi) /* update context block address */
1161: movl %edi,%ecx /* caller's caller address */
1162: movl %edx,%esi /* 2nd cached arc */
1163:
1164: #if !DO_STATS
1165: popl %ebx
1166: #endif
1167:
1168: /* Fall through to add element to the hash table. This may involve */
1169: /* searching a few hash table elements that don't need to be searched */
1170: /* since we have a new element, but it allows the hash table function */
1171: /* to be specified in only one place */
1172:
1173: /* Didn't find entry in cache, search the global hash table */
1174: /* %eax = gprof func header, %ebx = vars address if DO_STATS */
1175: /* %ecx = caller's caller */
1176: /* %edx, %esi = cached arcs that were searched */
1177: /* %edi, %esi, %ebx if DO_STATS pushed on stack */
1178:
1179: .align ALIGN
1180: MARK(_gprof_mcount_hash)
1181: LCL(gnocache):
1182:
1183: pushl %esi /* save 2nd arc searched */
1184: pushl %edx /* save 1st arc searched */
1185: movl %eax,%esi /* save gprof func header */
1186:
1187: #if DO_STATS
1188: SDINC(V_hash_num(%ebx))
1189: movl Estack+20(%esp),%edi /* caller's address */
1190: #else
1191: movl Estack+16(%esp),%edi /* caller's address */
1192: #endif
1193: movl %ecx,%eax /* caller's caller address */
1194: imull %edi,%eax /* multiply to get hash */
1195: movl H_hash_ptr(%esi),%edx /* hash pointer */
1196: shrl $GPROF_HASH_SHIFT,%eax /* eliminate low order bits */
1197: andl $GPROF_HASH_MASK,%eax /* mask to get hash value */
1198: leal 0(%edx,%eax,4),%eax /* pointer to hash bucket */
1199: movl %eax,%edx /* save hash bucket address */
1200:
1201: /* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1202: /* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */
1203: /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
1204:
1205: .align ALIGN
1206: LCL(ghash):
1207: movl G_next(%eax),%eax /* get next hash element */
1208: cmpl $0,%eax /* end of line? */
1209: je LCL(ghashnew) /* skip if allocate new hash */
1210:
1211: #if DO_STATS
1212: SDINC(V_hash_search(%ebx))
1213: #endif
1214:
1215: cmpl G_selfpc(%eax),%edi /* loop back if not one we want */
1216: jne LCL(ghash)
1217:
1218: cmpl G_frompc(%eax),%ecx /* loop back if not one we want */
1219: jne LCL(ghash)
1220:
1221: /* Found an entry, increment count, set up for caching, and return */
1222: /* %eax = arc, %ebx = vars address if DO_STATS, %esi = func header */
1223: /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
1224:
1225: DINC2(G_count(%eax),G_overflow(%eax)) /* bump arc count */
1226:
1227: popl %ecx /* previous 1st arc searched */
1228: movl %eax,H_cache_ptr+0(%esi) /* this element is now 1st arc */
1229: popl %edi /* previous 2nd arc searched */
1230: movl %ecx,H_cache_ptr+4(%esi) /* new 2nd arc to be searched */
1231: movl %edi,H_cache_ptr+8(%esi) /* new 3rd arc to be searched */
1232: popl %edi
1233: popl %esi
1234:
1235: #if DO_STATS
1236: MP_ENABLE_PREEMPTION(%ebx)
1237: popl %ebx
1238: #endif
1239:
1240: LEAVE0
1241: ret /* return to user */
1242:
1243: /* Allocate new arc */
1244: /* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1245: /* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */
1246: /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
1247:
1248: .align ALIGN
1249: MARK(_gprof_mcount_hashnew)
1250: LCL(ghashnew):
1251:
1252: #if !DO_STATS
1253: pushl %ebx /* load address of vars if we haven't */
1254: MP_DISABLE_PREEMPTION(%ebx)
1255: Vload /* already done so */
1256: #endif
1257:
1258: SLOCK incl V_gprof_records(%ebx)
1259: pushl %edx
1260: movl %ecx,%edi /* save caller's caller */
1261: movl $G_size,%eax /* arc size */
1262: movl $C_gprof,%ecx /* gprof memory pool */
1263: call EXT(_profile_alloc_asm)
1264: popl %edx
1265:
1266: movl $1,G_count(%eax) /* set call count */
1267: movl Estack+20(%esp),%ecx /* caller's address */
1268: movl %edi,G_frompc(%eax) /* caller's caller */
1269: movl %ecx,G_selfpc(%eax)
1270:
1271: #if !DO_STATS
1272: popl %ebx /* release %ebx if no stats */
1273: #endif
1274:
1275: movl (%edx),%ecx /* first hash bucket */
1276: movl %ecx,G_next(%eax) /* update link */
1277: movl %eax,%ecx /* copy for xchgl */
1278: xchgl %ecx,(%edx) /* add to hash linked list */
1279: movl %ecx,G_next(%eax) /* update in case list changed */
1280:
1281: popl %ecx /* previous 1st arc searched */
1282: popl %edi /* previous 2nd arc searched */
1283: movl %eax,H_cache_ptr+0(%esi) /* this element is now 1st arc */
1284: movl %ecx,H_cache_ptr+4(%esi) /* new 2nd arc to be searched */
1285: movl %edi,H_cache_ptr+8(%esi) /* new 3rd arc to be searched */
1286:
1287: popl %edi
1288: popl %esi
1289:
1290: #if DO_STATS
1291: MP_ENABLE_PREEMPTION(%ebx)
1292: popl %ebx
1293: #endif
1294:
1295: LEAVE0
1296: ret /* return to user */
1297:
1298: END(_gprof_mcount)
1299:
1300:
1301: /*
1302: * This function assumes that neither the caller or it's caller
1303: * has not omitted the frame pointer in order to get the caller's
1304: * caller. The stack looks like the following at the time of the call:
1305: *
1306: * +-------------------------------+
1307: * | |
1308: * | |
1309: * | caller's caller stack, |
1310: * | saved registers, params. |
1311: * | |
1312: * | |
1313: * +-------------------------------+
1314: * | caller's caller return addr. |
1315: * +-------------------------------+
1316: * fp --> | previous frame pointer |
1317: * +-------------------------------+
1318: * | |
1319: * | caller's stack, saved regs, |
1320: * | params. |
1321: * | |
1322: * +-------------------------------+
1323: * sp --> | caller's return address |
1324: * +-------------------------------+
1325: *
1326: * Recent versions of the compiler put the address of the pointer
1327: * sized word in %edx. Previous versions did not, but this code
1328: * does not support them.
1329: */
1330:
1331: /*
1332: * Note that OSF/rose blew defining _mcount, since it prepends leading
1333: * underscores, and _mcount didn't have a second leading underscore. However,
1334: * some of the kernel/server functions 'know' that mcount has a leading
1335: * underscore, so we satisfy both camps.
1336: */
1337:
1338: #if OLD_MCOUNT
1339: .globl mcount
1340: .globl _mcount
1341: ELF_FUNC(mcount)
1342: ELF_FUNC(_mcount)
1343: .align FALIGN
1344: _mcount:
1345: mcount:
1346:
1347: pushl %ebx
1348: MP_DISABLE_PREEMPTION(%ebx)
1349: Vload
1350:
1351: #if DO_STATS
1352: SDINC(V_old_mcount(%ebx))
1353: #endif
1354:
1355: /* In calling the functions, we will actually leave 1 extra word on the */
1356: /* top of the stack, but generated code will not notice, since the function */
1357: /* uses a frame pointer */
1358:
1359: movl V_mcount_ptr_ptr(%ebx),%ecx /* address of mcount_ptr */
1360: MP_ENABLE_PREEMPTION(%ebx)
1361: popl %ebx
1362: movl 4(%ebp),%eax /* caller's caller return address */
1363: xchgl %eax,(%esp) /* push & get return address */
1364: pushl %eax /* push return address */
1365: jmp *(%ecx) /* go to profile the function */
1366:
1367: End(mcount)
1368: End(_mcount)
1369: #endif
1370:
1371:
1372: #if !defined(KERNEL) && !defined(MACH_KERNEL)
1373:
1374: /*
1375: * Convert a 64-bit integer to a string.
1376: * Arg #1 is a pointer to a string (at least 24 bytes) or NULL
1377: * Arg #2 is the low part of the 64-bit integer.
1378: * Arg #3 is the high part of the 64-bit integer.
1379: */
1380:
1381: Entry(_profile_cnt_to_decimal)
1382: ENTER
1383: pushl %ebx
1384: pushl %esi
1385: pushl %edi
1386: movl Estack+16(%esp),%ebx /* pointer or null */
1387: movl Estack+20(%esp),%edi /* low part of number */
1388: movl $10,%ecx /* divisor */
1389: cmpl $0,%ebx /* skip if pointer ok */
1390: jne LCL(cvt_nonnull)
1391:
1392: MP_DISABLE_PREEMPTION(%ebx)
1393: Vload /* get _profile_vars address */
1394: leal V_num_buffer(%ebx),%ebx /* temp buffer to use */
1395:
1396: .align ALIGN
1397: LCL(cvt_nonnull):
1398: addl $(N_digit-1),%ebx /* point string at end */
1399: movb $0,0(%ebx) /* null terminate string */
1400:
1401: #if OVERFLOW
1402: movl Estack+24(%esp),%esi /* high part of number */
1403: cmpl $0,%esi /* any thing left in high part? */
1404: je LCL(cvt_low)
1405:
1406: .align ALIGN
1407: LCL(cvt_high):
1408: movl %esi,%eax /* calculate high/10 & high%10 */
1409: xorl %edx,%edx
1410: divl %ecx
1411: movl %eax,%esi
1412:
1413: movl %edi,%eax /* calculate (low + (high%10)*2^32) / 10 */
1414: divl %ecx
1415: movl %eax,%edi
1416:
1417: decl %ebx /* decrement string pointer */
1418: addl $48,%edx /* convert from 0..9 -> '0'..'9' */
1419: movb %dl,0(%ebx) /* store digit in string */
1420: cmpl $0,%esi /* any thing left in high part? */
1421: jne LCL(cvt_high)
1422:
1423: #endif /* OVERFLOW */
1424:
1425: .align ALIGN
1426: LCL(cvt_low):
1427: movl %edi,%eax /* get low part into %eax */
1428:
1429: .align ALIGN
1430: LCL(cvt_low2):
1431: xorl %edx,%edx /* 0 */
1432: divl %ecx /* calculate next digit */
1433: decl %ebx /* decrement string pointer */
1434: addl $48,%edx /* convert from 0..9 -> '0'..'9' */
1435: movb %dl,0(%ebx) /* store digit in string */
1436: cmpl $0,%eax /* any more digits to convert? */
1437: jne LCL(cvt_low2)
1438:
1439: movl %ebx,%eax /* return value */
1440: popl %edi
1441: popl %esi
1442: MP_ENABLE_PREEMPTION(%ebx)
1443: popl %ebx
1444: LEAVE0
1445: ret
1446:
1447: END(_profile_cnt_to_decimal)
1448:
1449: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.