1
0
Fork 0
mirror of https://github.com/tobast/libunwind-eh_elf.git synced 2024-06-25 02:41:45 +02:00

Performance optimisations for fast trace.

Insert static branch prediction predicates in useful places and avoid
unnecessary code in the hottest paths. Bypass unnecessary indirect
calls, in particular to access_mem(), when known to be safe.
This commit is contained in:
Lassi Tuura 2011-04-17 20:33:09 -07:00 committed by Arun Sharma
parent e2962af9d3
commit ae5c1f2adf
10 changed files with 88 additions and 62 deletions

View file

@ -301,6 +301,14 @@ struct elf_image
size_t size; /* (file-) size of the image */
};
/* Provide a place holder for architecture to override for fast access
to memory when known not to need to validate and know the access
will be local to the process. A suitable override will improve
unw_tdep_trace() performance in particular. */
#define ACCESS_MEM_FAST(ret,validate,cur,addr,to) \
do { (ret) = dwarf_get ((cur), DWARF_MEM_LOC ((cur), (addr)), &(to)); } \
while (0)
#include "tdep/libunwind_i.h"
#ifndef tdep_get_func_addr

View file

@ -110,10 +110,10 @@ dwarf_get_uc(const struct dwarf_cursor *cursor)
# define DWARF_LOC(r, t) ((dwarf_loc_t) { .val = (r) })
# define DWARF_IS_REG_LOC(l) 0
# define DWARF_REG_LOC(c,r) (DWARF_LOC((unw_word_t) \
tdep_uc_addr(dwarf_get_uc(c), (r)), 0))
x86_64_r_uc_addr(dwarf_get_uc(c), (r)), 0))
# define DWARF_MEM_LOC(c,m) DWARF_LOC ((m), 0)
# define DWARF_FPREG_LOC(c,r) (DWARF_LOC((unw_word_t) \
tdep_uc_addr(dwarf_get_uc(c), (r)), 0))
x86_64_r_uc_addr(dwarf_get_uc(c), (r)), 0))
#else /* !UNW_LOCAL_ONLY */
# define DWARF_LOC_TYPE_FP (1 << 0)
@ -184,7 +184,6 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
/* Platforms that support UNW_INFO_FORMAT_TABLE need to define
tdep_search_unwind_table. */
#define tdep_search_unwind_table dwarf_search_unwind_table
#define tdep_uc_addr UNW_ARCH_OBJ(uc_addr)
#define tdep_get_elf_image UNW_ARCH_OBJ(get_elf_image)
#define tdep_access_reg UNW_OBJ(access_reg)
#define tdep_access_fpreg UNW_OBJ(access_fpreg)
@ -199,6 +198,7 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
#endif
#define tdep_stash_frame UNW_OBJ(stash_frame)
#define tdep_trace UNW_OBJ(tdep_trace)
#define x86_64_r_uc_addr UNW_OBJ(r_uc_addr)
#ifdef UNW_LOCAL_ONLY
# define tdep_find_proc_info(c,ip,n) \
@ -226,7 +226,7 @@ extern void tdep_init_mem_validate (void);
extern int tdep_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
unw_dyn_info_t *di, unw_proc_info_t *pi,
int need_unwind_info, void *arg);
extern void *tdep_uc_addr (ucontext_t *uc, int reg);
extern void *x86_64_r_uc_addr (ucontext_t *uc, int reg);
extern int tdep_get_elf_image (struct elf_image *ei, pid_t pid, unw_word_t ip,
unsigned long *segbase, unsigned long *mapoff,
char *path, size_t pathlen);

View file

@ -39,7 +39,7 @@ slow_backtrace (void **buffer, int size, unw_context_t *uc)
unw_word_t ip;
int n = 0;
if (unw_init_local (&cursor, uc) < 0)
if (unlikely (unw_init_local (&cursor, uc) < 0))
return 0;
while (unw_step (&cursor) > 0)
@ -63,10 +63,10 @@ unw_backtrace (void **buffer, int size)
tdep_getcontext_trace (&uc);
if (unw_init_local (&cursor, &uc) < 0)
if (unlikely (unw_init_local (&cursor, &uc) < 0))
return 0;
if (tdep_trace (&cursor, buffer, &n) < 0)
if (unlikely (tdep_trace (&cursor, buffer, &n) < 0))
{
unw_getcontext (&uc);
return slow_backtrace (buffer, size, &uc);

View file

@ -47,16 +47,6 @@ static struct unw_addr_space local_addr_space;
PROTECTED unw_addr_space_t unw_local_addr_space = &local_addr_space;
# ifdef UNW_LOCAL_ONLY
HIDDEN void *
tdep_uc_addr (ucontext_t *uc, int reg)
{
return x86_64_r_uc_addr (uc, reg);
}
# endif /* UNW_LOCAL_ONLY */
HIDDEN unw_dyn_info_list_t _U_dyn_info_list;
/* XXX fix me: there is currently no way to locate the dyn-info list
@ -168,7 +158,7 @@ static int
access_mem (unw_addr_space_t as, unw_word_t addr, unw_word_t *val, int write,
void *arg)
{
if (write)
if (unlikely (write))
{
Debug (16, "mem[%016lx] <- %lx\n", addr, *val);
*(unw_word_t *) addr = *val;
@ -177,7 +167,8 @@ access_mem (unw_addr_space_t as, unw_word_t addr, unw_word_t *val, int write,
{
/* validate address */
const struct cursor *c = (const struct cursor *)arg;
if (c && c->validate && validate_mem(addr))
if (likely (c != 0) && unlikely (c->validate)
&& unlikely (validate_mem (addr)))
return -1;
*val = *(unw_word_t *) addr;
Debug (16, "mem[%016lx] -> %lx\n", addr, *val);

View file

@ -43,7 +43,7 @@ unw_init_local (unw_cursor_t *cursor, ucontext_t *uc)
{
struct cursor *c = (struct cursor *) cursor;
if (tdep_needs_initialization)
if (unlikely (tdep_needs_initialization))
tdep_init ();
Debug (1, "(cursor=%p)\n", c);

View file

@ -154,6 +154,7 @@ unw_handle_signal_frame (unw_cursor_t *cursor)
HIDDEN void *
x86_64_r_uc_addr (ucontext_t *uc, int reg)
{
/* NOTE: common_init() in init.h inlines these for fast path access. */
void *addr;
switch (reg)

View file

@ -106,6 +106,7 @@ unw_handle_signal_frame (unw_cursor_t *cursor)
HIDDEN void *
x86_64_r_uc_addr (ucontext_t *uc, int reg)
{
/* NOTE: common_init() in init.h inlines these for fast path access. */
void *addr;
switch (reg)

View file

@ -92,7 +92,7 @@ trace_cache_buckets (void)
unw_tdep_frame_t *frames = mempool_alloc(&trace_frame_pool);
size_t i;
if (likely (frames != 0))
if (likely(frames != 0))
for (i = 0; i < (1u << HASH_LOW_BITS); ++i)
frames[i] = empty_frame;
@ -142,7 +142,7 @@ trace_cache_expand (unw_trace_cache_t *cache)
old_size = (1u << cache->log_frame_vecs);
new_size = cache->log_frame_vecs + 2;
for (i = old_size; i < (1u << new_size); ++i)
if (unlikely (! (cache->frames[i] = trace_cache_buckets())))
if (unlikely(! (cache->frames[i] = trace_cache_buckets())))
{
Debug(5, "failed to expand cache to 2^%lu hash bucket sets\n", new_size);
for (j = old_size; j < i; ++j)
@ -237,10 +237,10 @@ trace_init_addr (unw_tdep_frame_t *f,
d->loc[UNW_X86_64_RSP] = DWARF_REG_LOC (d, UNW_X86_64_RSP);
c->frame_info = *f;
if (dwarf_put (d, d->loc[UNW_X86_64_RIP], rip) >= 0
&& dwarf_put (d, d->loc[UNW_X86_64_RBP], rbp) >= 0
&& dwarf_put (d, d->loc[UNW_X86_64_RSP], rsp) >= 0
&& (ret = unw_step (cursor)) >= 0)
if (likely(dwarf_put (d, d->loc[UNW_X86_64_RIP], rip) >= 0)
&& likely(dwarf_put (d, d->loc[UNW_X86_64_RBP], rbp) >= 0)
&& likely(dwarf_put (d, d->loc[UNW_X86_64_RSP], rsp) >= 0)
&& likely((ret = unw_step (cursor)) >= 0))
*f = c->frame_info;
/* If unw_step() stopped voluntarily, remember that, even if it
@ -290,14 +290,14 @@ trace_lookup (unw_cursor_t *cursor,
addr = frame->virtual_address;
/* Return if we found the address. */
if (addr == rip)
if (likely(addr == rip))
{
Debug (4, "found address after %ld steps\n", i);
return frame;
}
/* If slot is empty, reuse it. */
if (! addr)
if (likely(! addr))
break;
/* Linear probe to next slot candidate, step = 1. */
@ -310,9 +310,9 @@ trace_lookup (unw_cursor_t *cursor,
it's free or collides. Note that hash expansion drops previous
contents; further lookups will refill the hash. */
Debug (4, "updating slot %lu after %ld steps, replacing 0x%lx\n", slot, i, addr);
if (unlikely (addr || cache->used >= cache_size / 2))
if (unlikely(addr || cache->used >= cache_size / 2))
{
if (unlikely (trace_cache_expand (cache) < 0))
if (unlikely(trace_cache_expand (cache) < 0))
return 0;
cache_size = 1u << (HASH_LOW_BITS + cache->log_frame_vecs);
@ -404,7 +404,7 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
int ret;
/* Check input parametres. */
if (! cursor || ! buffer || ! size || (maxdepth = *size) <= 0)
if (unlikely(! cursor || ! buffer || ! size || (maxdepth = *size) <= 0))
return -UNW_EINVAL;
Debug (1, "begin ip 0x%lx cfa 0x%lx\n", d->ip, d->cfa);
@ -415,7 +415,7 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
/* Determine initial register values. */
rip = d->ip;
rsp = cfa = d->cfa;
if ((ret = dwarf_get (d, d->loc[UNW_X86_64_RBP], &rbp)) < 0)
if (unlikely((ret = dwarf_get (d, d->loc[UNW_X86_64_RBP], &rbp)) < 0))
{
Debug (1, "returning %d, rbp value not found\n", ret);
*size = 0;
@ -424,7 +424,7 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
}
/* Get frame cache. */
if (! (cache = trace_cache_get()))
if (unlikely(! (cache = trace_cache_get())))
{
Debug (1, "returning %d, cannot get trace cache\n", -UNW_ENOMEM);
*size = 0;
@ -450,7 +450,7 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
unw_tdep_frame_t *f = trace_lookup (cursor, cache, cfa, rip, rbp, rsp);
/* If we don't have information for this frame, give up. */
if (! f)
if (unlikely(! f))
{
ret = -UNW_ENOINFO;
break;
@ -481,9 +481,9 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
case UNW_X86_64_FRAME_STANDARD:
/* Advance standard traceable frame. */
cfa = (f->cfa_reg_rsp ? rsp : rbp) + f->cfa_reg_offset;
ret = dwarf_get (d, DWARF_MEM_LOC (d, cfa - 8), &rip);
if (ret >= 0 && f->rbp_cfa_offset != -1)
ret = dwarf_get (d, DWARF_MEM_LOC (d, cfa + f->rbp_cfa_offset), &rbp);
ACCESS_MEM_FAST(ret, c->validate, d, cfa - 8, rip);
if (likely(ret >= 0) && likely(f->rbp_cfa_offset != -1))
ACCESS_MEM_FAST(ret, c->validate, d, cfa + f->rbp_cfa_offset, rbp);
/* Don't bother reading RSP from DWARF, CFA becomes new RSP. */
rsp = cfa;
@ -497,13 +497,12 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
registers (ucontext) among other things. We know the info
is stored at some unknown constant offset off inner frame's
CFA. We determine the actual offset from DWARF unwind info. */
d->use_prev_instr = 0;
cfa = cfa + f->cfa_reg_offset;
ret = dwarf_get (d, DWARF_MEM_LOC (d, cfa + f->rbp_cfa_offset + dRIP), &rip);
if (ret >= 0)
ret = dwarf_get (d, DWARF_MEM_LOC (d, cfa + f->rbp_cfa_offset), &rbp);
if (ret >= 0)
ret = dwarf_get (d, DWARF_MEM_LOC (d, cfa + f->rsp_cfa_offset), &rsp);
ACCESS_MEM_FAST(ret, c->validate, d, cfa + f->rbp_cfa_offset + dRIP, rip);
if (likely(ret >= 0))
ACCESS_MEM_FAST(ret, c->validate, d, cfa + f->rbp_cfa_offset, rbp);
if (likely(ret >= 0))
ACCESS_MEM_FAST(ret, c->validate, d, cfa + f->rsp_cfa_offset, rsp);
/* Resume stack at signal restoration point. The stack is not
necessarily continuous here, especially with sigaltstack(). */
@ -524,8 +523,8 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
Debug (4, "new cfa 0x%lx rip 0x%lx rsp 0x%lx rbp 0x%lx\n",
cfa, rip, rsp, rbp);
/* If we failed on ended up somewhere bogus, stop. */
if (ret < 0 || rip < 0x4000)
/* If we failed or ended up somewhere bogus, stop. */
if (unlikely(ret < 0 || rip < 0x4000))
break;
/* Record this address in stack trace. We skipped the first address. */

View file

@ -27,28 +27,42 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "unwind_i.h"
/* Avoid a trip to x86_64_r_uc_addr() for purely local initialisation. */
#if defined UNW_LOCAL_ONLY && defined __linux
# define REG_INIT_LOC(c, rlc, ruc) \
DWARF_LOC ((unw_word_t) &c->uc->uc_mcontext.gregs[REG_ ## ruc], 0)
#elif defined UNW_LOCAL_ONLY && defined __FreeBSD__
# define REG_INIT_LOC(c, rlc, ruc) \
DWARF_LOC ((unw_word_t) &c->uc->uc_mcontext.mc_ ## rlc, 0)
#else
# define REG_INIT_LOC(c, rlc, ruc) \
DWARF_REG_LOC (&c->dwarf, UNW_X86_64_ ## ruc)
#endif
static inline int
common_init (struct cursor *c, unsigned use_prev_instr)
{
int ret;
c->dwarf.loc[RAX] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RAX);
c->dwarf.loc[RDX] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RDX);
c->dwarf.loc[RCX] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RCX);
c->dwarf.loc[RBX] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RBX);
c->dwarf.loc[RSI] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RSI);
c->dwarf.loc[RDI] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RDI);
c->dwarf.loc[RBP] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RBP);
c->dwarf.loc[RSP] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RSP);
c->dwarf.loc[R8] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R8);
c->dwarf.loc[R9] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R9);
c->dwarf.loc[R10] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R10);
c->dwarf.loc[R11] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R11);
c->dwarf.loc[R12] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R12);
c->dwarf.loc[R13] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R13);
c->dwarf.loc[R14] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R14);
c->dwarf.loc[R15] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R15);
c->dwarf.loc[RIP] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RIP);
c->dwarf.loc[RAX] = REG_INIT_LOC(c, rax, RAX);
c->dwarf.loc[RDX] = REG_INIT_LOC(c, rdx, RDX);
c->dwarf.loc[RCX] = REG_INIT_LOC(c, rcx, RCX);
c->dwarf.loc[RBX] = REG_INIT_LOC(c, rbx, RBX);
c->dwarf.loc[RSI] = REG_INIT_LOC(c, rsi, RSI);
c->dwarf.loc[RDI] = REG_INIT_LOC(c, rdi, RDI);
c->dwarf.loc[RBP] = REG_INIT_LOC(c, rbp, RBP);
c->dwarf.loc[RSP] = REG_INIT_LOC(c, rsp, RSP);
c->dwarf.loc[R8] = REG_INIT_LOC(c, r8, R8);
c->dwarf.loc[R9] = REG_INIT_LOC(c, r9, R9);
c->dwarf.loc[R10] = REG_INIT_LOC(c, r10, R10);
c->dwarf.loc[R11] = REG_INIT_LOC(c, r11, R11);
c->dwarf.loc[R12] = REG_INIT_LOC(c, r12, R12);
c->dwarf.loc[R13] = REG_INIT_LOC(c, r13, R13);
c->dwarf.loc[R14] = REG_INIT_LOC(c, r14, R14);
c->dwarf.loc[R15] = REG_INIT_LOC(c, r15, R15);
c->dwarf.loc[RIP] = REG_INIT_LOC(c, rip, RIP);
ret = dwarf_get (&c->dwarf, c->dwarf.loc[RIP], &c->dwarf.ip);
if (ret < 0)

View file

@ -65,6 +65,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define x86_64_r_uc_addr UNW_OBJ(r_uc_addr)
#define x86_64_sigreturn UNW_OBJ(sigreturn)
/* By-pass calls to access_mem() when known to be safe. */
#ifdef UNW_LOCAL_ONLY
# undef ACCESS_MEM_FAST
# define ACCESS_MEM_FAST(ret,validate,cur,addr,to) \
do { \
if (unlikely(validate)) \
(ret) = dwarf_get ((cur), DWARF_MEM_LOC ((cur), (addr)), &(to)); \
else \
(ret) = 0, (to) = *(unw_word_t *)(addr); \
} while (0)
#endif
extern void x86_64_local_addr_space_init (void);
extern int x86_64_local_resume (unw_addr_space_t as, unw_cursor_t *cursor,
void *arg);