mirror of
https://github.com/tobast/libunwind-eh_elf.git
synced 2025-02-16 18:21:41 +01:00
Performance optimisations for fast trace.
Insert static branch prediction predicates in useful places and avoid unnecessary code in the hottest paths. Bypass unnecessary indirect calls, in particular to access_mem(), when known to be safe.
This commit is contained in:
parent
e2962af9d3
commit
ae5c1f2adf
10 changed files with 88 additions and 62 deletions
|
@ -301,6 +301,14 @@ struct elf_image
|
|||
size_t size; /* (file-) size of the image */
|
||||
};
|
||||
|
||||
/* Provide a place holder for architecture to override for fast access
|
||||
to memory when known not to need to validate and know the access
|
||||
will be local to the process. A suitable override will improve
|
||||
unw_tdep_trace() performance in particular. */
|
||||
#define ACCESS_MEM_FAST(ret,validate,cur,addr,to) \
|
||||
do { (ret) = dwarf_get ((cur), DWARF_MEM_LOC ((cur), (addr)), &(to)); } \
|
||||
while (0)
|
||||
|
||||
#include "tdep/libunwind_i.h"
|
||||
|
||||
#ifndef tdep_get_func_addr
|
||||
|
|
|
@ -110,10 +110,10 @@ dwarf_get_uc(const struct dwarf_cursor *cursor)
|
|||
# define DWARF_LOC(r, t) ((dwarf_loc_t) { .val = (r) })
|
||||
# define DWARF_IS_REG_LOC(l) 0
|
||||
# define DWARF_REG_LOC(c,r) (DWARF_LOC((unw_word_t) \
|
||||
tdep_uc_addr(dwarf_get_uc(c), (r)), 0))
|
||||
x86_64_r_uc_addr(dwarf_get_uc(c), (r)), 0))
|
||||
# define DWARF_MEM_LOC(c,m) DWARF_LOC ((m), 0)
|
||||
# define DWARF_FPREG_LOC(c,r) (DWARF_LOC((unw_word_t) \
|
||||
tdep_uc_addr(dwarf_get_uc(c), (r)), 0))
|
||||
x86_64_r_uc_addr(dwarf_get_uc(c), (r)), 0))
|
||||
#else /* !UNW_LOCAL_ONLY */
|
||||
|
||||
# define DWARF_LOC_TYPE_FP (1 << 0)
|
||||
|
@ -184,7 +184,6 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
|
|||
/* Platforms that support UNW_INFO_FORMAT_TABLE need to define
|
||||
tdep_search_unwind_table. */
|
||||
#define tdep_search_unwind_table dwarf_search_unwind_table
|
||||
#define tdep_uc_addr UNW_ARCH_OBJ(uc_addr)
|
||||
#define tdep_get_elf_image UNW_ARCH_OBJ(get_elf_image)
|
||||
#define tdep_access_reg UNW_OBJ(access_reg)
|
||||
#define tdep_access_fpreg UNW_OBJ(access_fpreg)
|
||||
|
@ -199,6 +198,7 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
|
|||
#endif
|
||||
#define tdep_stash_frame UNW_OBJ(stash_frame)
|
||||
#define tdep_trace UNW_OBJ(tdep_trace)
|
||||
#define x86_64_r_uc_addr UNW_OBJ(r_uc_addr)
|
||||
|
||||
#ifdef UNW_LOCAL_ONLY
|
||||
# define tdep_find_proc_info(c,ip,n) \
|
||||
|
@ -226,7 +226,7 @@ extern void tdep_init_mem_validate (void);
|
|||
extern int tdep_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
|
||||
unw_dyn_info_t *di, unw_proc_info_t *pi,
|
||||
int need_unwind_info, void *arg);
|
||||
extern void *tdep_uc_addr (ucontext_t *uc, int reg);
|
||||
extern void *x86_64_r_uc_addr (ucontext_t *uc, int reg);
|
||||
extern int tdep_get_elf_image (struct elf_image *ei, pid_t pid, unw_word_t ip,
|
||||
unsigned long *segbase, unsigned long *mapoff,
|
||||
char *path, size_t pathlen);
|
||||
|
|
|
@ -39,7 +39,7 @@ slow_backtrace (void **buffer, int size, unw_context_t *uc)
|
|||
unw_word_t ip;
|
||||
int n = 0;
|
||||
|
||||
if (unw_init_local (&cursor, uc) < 0)
|
||||
if (unlikely (unw_init_local (&cursor, uc) < 0))
|
||||
return 0;
|
||||
|
||||
while (unw_step (&cursor) > 0)
|
||||
|
@ -63,10 +63,10 @@ unw_backtrace (void **buffer, int size)
|
|||
|
||||
tdep_getcontext_trace (&uc);
|
||||
|
||||
if (unw_init_local (&cursor, &uc) < 0)
|
||||
if (unlikely (unw_init_local (&cursor, &uc) < 0))
|
||||
return 0;
|
||||
|
||||
if (tdep_trace (&cursor, buffer, &n) < 0)
|
||||
if (unlikely (tdep_trace (&cursor, buffer, &n) < 0))
|
||||
{
|
||||
unw_getcontext (&uc);
|
||||
return slow_backtrace (buffer, size, &uc);
|
||||
|
|
|
@ -47,16 +47,6 @@ static struct unw_addr_space local_addr_space;
|
|||
|
||||
PROTECTED unw_addr_space_t unw_local_addr_space = &local_addr_space;
|
||||
|
||||
# ifdef UNW_LOCAL_ONLY
|
||||
|
||||
HIDDEN void *
|
||||
tdep_uc_addr (ucontext_t *uc, int reg)
|
||||
{
|
||||
return x86_64_r_uc_addr (uc, reg);
|
||||
}
|
||||
|
||||
# endif /* UNW_LOCAL_ONLY */
|
||||
|
||||
HIDDEN unw_dyn_info_list_t _U_dyn_info_list;
|
||||
|
||||
/* XXX fix me: there is currently no way to locate the dyn-info list
|
||||
|
@ -168,7 +158,7 @@ static int
|
|||
access_mem (unw_addr_space_t as, unw_word_t addr, unw_word_t *val, int write,
|
||||
void *arg)
|
||||
{
|
||||
if (write)
|
||||
if (unlikely (write))
|
||||
{
|
||||
Debug (16, "mem[%016lx] <- %lx\n", addr, *val);
|
||||
*(unw_word_t *) addr = *val;
|
||||
|
@ -177,7 +167,8 @@ access_mem (unw_addr_space_t as, unw_word_t addr, unw_word_t *val, int write,
|
|||
{
|
||||
/* validate address */
|
||||
const struct cursor *c = (const struct cursor *)arg;
|
||||
if (c && c->validate && validate_mem(addr))
|
||||
if (likely (c != 0) && unlikely (c->validate)
|
||||
&& unlikely (validate_mem (addr)))
|
||||
return -1;
|
||||
*val = *(unw_word_t *) addr;
|
||||
Debug (16, "mem[%016lx] -> %lx\n", addr, *val);
|
||||
|
|
|
@ -43,7 +43,7 @@ unw_init_local (unw_cursor_t *cursor, ucontext_t *uc)
|
|||
{
|
||||
struct cursor *c = (struct cursor *) cursor;
|
||||
|
||||
if (tdep_needs_initialization)
|
||||
if (unlikely (tdep_needs_initialization))
|
||||
tdep_init ();
|
||||
|
||||
Debug (1, "(cursor=%p)\n", c);
|
||||
|
|
|
@ -154,6 +154,7 @@ unw_handle_signal_frame (unw_cursor_t *cursor)
|
|||
HIDDEN void *
|
||||
x86_64_r_uc_addr (ucontext_t *uc, int reg)
|
||||
{
|
||||
/* NOTE: common_init() in init.h inlines these for fast path access. */
|
||||
void *addr;
|
||||
|
||||
switch (reg)
|
||||
|
|
|
@ -106,6 +106,7 @@ unw_handle_signal_frame (unw_cursor_t *cursor)
|
|||
HIDDEN void *
|
||||
x86_64_r_uc_addr (ucontext_t *uc, int reg)
|
||||
{
|
||||
/* NOTE: common_init() in init.h inlines these for fast path access. */
|
||||
void *addr;
|
||||
|
||||
switch (reg)
|
||||
|
|
|
@ -92,7 +92,7 @@ trace_cache_buckets (void)
|
|||
unw_tdep_frame_t *frames = mempool_alloc(&trace_frame_pool);
|
||||
size_t i;
|
||||
|
||||
if (likely (frames != 0))
|
||||
if (likely(frames != 0))
|
||||
for (i = 0; i < (1u << HASH_LOW_BITS); ++i)
|
||||
frames[i] = empty_frame;
|
||||
|
||||
|
@ -142,7 +142,7 @@ trace_cache_expand (unw_trace_cache_t *cache)
|
|||
old_size = (1u << cache->log_frame_vecs);
|
||||
new_size = cache->log_frame_vecs + 2;
|
||||
for (i = old_size; i < (1u << new_size); ++i)
|
||||
if (unlikely (! (cache->frames[i] = trace_cache_buckets())))
|
||||
if (unlikely(! (cache->frames[i] = trace_cache_buckets())))
|
||||
{
|
||||
Debug(5, "failed to expand cache to 2^%lu hash bucket sets\n", new_size);
|
||||
for (j = old_size; j < i; ++j)
|
||||
|
@ -237,10 +237,10 @@ trace_init_addr (unw_tdep_frame_t *f,
|
|||
d->loc[UNW_X86_64_RSP] = DWARF_REG_LOC (d, UNW_X86_64_RSP);
|
||||
c->frame_info = *f;
|
||||
|
||||
if (dwarf_put (d, d->loc[UNW_X86_64_RIP], rip) >= 0
|
||||
&& dwarf_put (d, d->loc[UNW_X86_64_RBP], rbp) >= 0
|
||||
&& dwarf_put (d, d->loc[UNW_X86_64_RSP], rsp) >= 0
|
||||
&& (ret = unw_step (cursor)) >= 0)
|
||||
if (likely(dwarf_put (d, d->loc[UNW_X86_64_RIP], rip) >= 0)
|
||||
&& likely(dwarf_put (d, d->loc[UNW_X86_64_RBP], rbp) >= 0)
|
||||
&& likely(dwarf_put (d, d->loc[UNW_X86_64_RSP], rsp) >= 0)
|
||||
&& likely((ret = unw_step (cursor)) >= 0))
|
||||
*f = c->frame_info;
|
||||
|
||||
/* If unw_step() stopped voluntarily, remember that, even if it
|
||||
|
@ -290,14 +290,14 @@ trace_lookup (unw_cursor_t *cursor,
|
|||
addr = frame->virtual_address;
|
||||
|
||||
/* Return if we found the address. */
|
||||
if (addr == rip)
|
||||
if (likely(addr == rip))
|
||||
{
|
||||
Debug (4, "found address after %ld steps\n", i);
|
||||
return frame;
|
||||
}
|
||||
|
||||
/* If slot is empty, reuse it. */
|
||||
if (! addr)
|
||||
if (likely(! addr))
|
||||
break;
|
||||
|
||||
/* Linear probe to next slot candidate, step = 1. */
|
||||
|
@ -310,9 +310,9 @@ trace_lookup (unw_cursor_t *cursor,
|
|||
it's free or collides. Note that hash expansion drops previous
|
||||
contents; further lookups will refill the hash. */
|
||||
Debug (4, "updating slot %lu after %ld steps, replacing 0x%lx\n", slot, i, addr);
|
||||
if (unlikely (addr || cache->used >= cache_size / 2))
|
||||
if (unlikely(addr || cache->used >= cache_size / 2))
|
||||
{
|
||||
if (unlikely (trace_cache_expand (cache) < 0))
|
||||
if (unlikely(trace_cache_expand (cache) < 0))
|
||||
return 0;
|
||||
|
||||
cache_size = 1u << (HASH_LOW_BITS + cache->log_frame_vecs);
|
||||
|
@ -404,7 +404,7 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
|
|||
int ret;
|
||||
|
||||
/* Check input parametres. */
|
||||
if (! cursor || ! buffer || ! size || (maxdepth = *size) <= 0)
|
||||
if (unlikely(! cursor || ! buffer || ! size || (maxdepth = *size) <= 0))
|
||||
return -UNW_EINVAL;
|
||||
|
||||
Debug (1, "begin ip 0x%lx cfa 0x%lx\n", d->ip, d->cfa);
|
||||
|
@ -415,7 +415,7 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
|
|||
/* Determine initial register values. */
|
||||
rip = d->ip;
|
||||
rsp = cfa = d->cfa;
|
||||
if ((ret = dwarf_get (d, d->loc[UNW_X86_64_RBP], &rbp)) < 0)
|
||||
if (unlikely((ret = dwarf_get (d, d->loc[UNW_X86_64_RBP], &rbp)) < 0))
|
||||
{
|
||||
Debug (1, "returning %d, rbp value not found\n", ret);
|
||||
*size = 0;
|
||||
|
@ -424,7 +424,7 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
|
|||
}
|
||||
|
||||
/* Get frame cache. */
|
||||
if (! (cache = trace_cache_get()))
|
||||
if (unlikely(! (cache = trace_cache_get())))
|
||||
{
|
||||
Debug (1, "returning %d, cannot get trace cache\n", -UNW_ENOMEM);
|
||||
*size = 0;
|
||||
|
@ -450,7 +450,7 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
|
|||
unw_tdep_frame_t *f = trace_lookup (cursor, cache, cfa, rip, rbp, rsp);
|
||||
|
||||
/* If we don't have information for this frame, give up. */
|
||||
if (! f)
|
||||
if (unlikely(! f))
|
||||
{
|
||||
ret = -UNW_ENOINFO;
|
||||
break;
|
||||
|
@ -481,9 +481,9 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
|
|||
case UNW_X86_64_FRAME_STANDARD:
|
||||
/* Advance standard traceable frame. */
|
||||
cfa = (f->cfa_reg_rsp ? rsp : rbp) + f->cfa_reg_offset;
|
||||
ret = dwarf_get (d, DWARF_MEM_LOC (d, cfa - 8), &rip);
|
||||
if (ret >= 0 && f->rbp_cfa_offset != -1)
|
||||
ret = dwarf_get (d, DWARF_MEM_LOC (d, cfa + f->rbp_cfa_offset), &rbp);
|
||||
ACCESS_MEM_FAST(ret, c->validate, d, cfa - 8, rip);
|
||||
if (likely(ret >= 0) && likely(f->rbp_cfa_offset != -1))
|
||||
ACCESS_MEM_FAST(ret, c->validate, d, cfa + f->rbp_cfa_offset, rbp);
|
||||
|
||||
/* Don't bother reading RSP from DWARF, CFA becomes new RSP. */
|
||||
rsp = cfa;
|
||||
|
@ -497,13 +497,12 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
|
|||
registers (ucontext) among other things. We know the info
|
||||
is stored at some unknown constant offset off inner frame's
|
||||
CFA. We determine the actual offset from DWARF unwind info. */
|
||||
d->use_prev_instr = 0;
|
||||
cfa = cfa + f->cfa_reg_offset;
|
||||
ret = dwarf_get (d, DWARF_MEM_LOC (d, cfa + f->rbp_cfa_offset + dRIP), &rip);
|
||||
if (ret >= 0)
|
||||
ret = dwarf_get (d, DWARF_MEM_LOC (d, cfa + f->rbp_cfa_offset), &rbp);
|
||||
if (ret >= 0)
|
||||
ret = dwarf_get (d, DWARF_MEM_LOC (d, cfa + f->rsp_cfa_offset), &rsp);
|
||||
ACCESS_MEM_FAST(ret, c->validate, d, cfa + f->rbp_cfa_offset + dRIP, rip);
|
||||
if (likely(ret >= 0))
|
||||
ACCESS_MEM_FAST(ret, c->validate, d, cfa + f->rbp_cfa_offset, rbp);
|
||||
if (likely(ret >= 0))
|
||||
ACCESS_MEM_FAST(ret, c->validate, d, cfa + f->rsp_cfa_offset, rsp);
|
||||
|
||||
/* Resume stack at signal restoration point. The stack is not
|
||||
necessarily continuous here, especially with sigaltstack(). */
|
||||
|
@ -524,8 +523,8 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
|
|||
Debug (4, "new cfa 0x%lx rip 0x%lx rsp 0x%lx rbp 0x%lx\n",
|
||||
cfa, rip, rsp, rbp);
|
||||
|
||||
/* If we failed on ended up somewhere bogus, stop. */
|
||||
if (ret < 0 || rip < 0x4000)
|
||||
/* If we failed or ended up somewhere bogus, stop. */
|
||||
if (unlikely(ret < 0 || rip < 0x4000))
|
||||
break;
|
||||
|
||||
/* Record this address in stack trace. We skipped the first address. */
|
||||
|
|
|
@ -27,28 +27,42 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
|||
|
||||
#include "unwind_i.h"
|
||||
|
||||
/* Avoid a trip to x86_64_r_uc_addr() for purely local initialisation. */
|
||||
#if defined UNW_LOCAL_ONLY && defined __linux
|
||||
# define REG_INIT_LOC(c, rlc, ruc) \
|
||||
DWARF_LOC ((unw_word_t) &c->uc->uc_mcontext.gregs[REG_ ## ruc], 0)
|
||||
|
||||
#elif defined UNW_LOCAL_ONLY && defined __FreeBSD__
|
||||
# define REG_INIT_LOC(c, rlc, ruc) \
|
||||
DWARF_LOC ((unw_word_t) &c->uc->uc_mcontext.mc_ ## rlc, 0)
|
||||
|
||||
#else
|
||||
# define REG_INIT_LOC(c, rlc, ruc) \
|
||||
DWARF_REG_LOC (&c->dwarf, UNW_X86_64_ ## ruc)
|
||||
#endif
|
||||
|
||||
static inline int
|
||||
common_init (struct cursor *c, unsigned use_prev_instr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
c->dwarf.loc[RAX] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RAX);
|
||||
c->dwarf.loc[RDX] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RDX);
|
||||
c->dwarf.loc[RCX] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RCX);
|
||||
c->dwarf.loc[RBX] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RBX);
|
||||
c->dwarf.loc[RSI] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RSI);
|
||||
c->dwarf.loc[RDI] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RDI);
|
||||
c->dwarf.loc[RBP] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RBP);
|
||||
c->dwarf.loc[RSP] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RSP);
|
||||
c->dwarf.loc[R8] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R8);
|
||||
c->dwarf.loc[R9] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R9);
|
||||
c->dwarf.loc[R10] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R10);
|
||||
c->dwarf.loc[R11] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R11);
|
||||
c->dwarf.loc[R12] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R12);
|
||||
c->dwarf.loc[R13] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R13);
|
||||
c->dwarf.loc[R14] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R14);
|
||||
c->dwarf.loc[R15] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R15);
|
||||
c->dwarf.loc[RIP] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RIP);
|
||||
c->dwarf.loc[RAX] = REG_INIT_LOC(c, rax, RAX);
|
||||
c->dwarf.loc[RDX] = REG_INIT_LOC(c, rdx, RDX);
|
||||
c->dwarf.loc[RCX] = REG_INIT_LOC(c, rcx, RCX);
|
||||
c->dwarf.loc[RBX] = REG_INIT_LOC(c, rbx, RBX);
|
||||
c->dwarf.loc[RSI] = REG_INIT_LOC(c, rsi, RSI);
|
||||
c->dwarf.loc[RDI] = REG_INIT_LOC(c, rdi, RDI);
|
||||
c->dwarf.loc[RBP] = REG_INIT_LOC(c, rbp, RBP);
|
||||
c->dwarf.loc[RSP] = REG_INIT_LOC(c, rsp, RSP);
|
||||
c->dwarf.loc[R8] = REG_INIT_LOC(c, r8, R8);
|
||||
c->dwarf.loc[R9] = REG_INIT_LOC(c, r9, R9);
|
||||
c->dwarf.loc[R10] = REG_INIT_LOC(c, r10, R10);
|
||||
c->dwarf.loc[R11] = REG_INIT_LOC(c, r11, R11);
|
||||
c->dwarf.loc[R12] = REG_INIT_LOC(c, r12, R12);
|
||||
c->dwarf.loc[R13] = REG_INIT_LOC(c, r13, R13);
|
||||
c->dwarf.loc[R14] = REG_INIT_LOC(c, r14, R14);
|
||||
c->dwarf.loc[R15] = REG_INIT_LOC(c, r15, R15);
|
||||
c->dwarf.loc[RIP] = REG_INIT_LOC(c, rip, RIP);
|
||||
|
||||
ret = dwarf_get (&c->dwarf, c->dwarf.loc[RIP], &c->dwarf.ip);
|
||||
if (ret < 0)
|
||||
|
|
|
@ -65,6 +65,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
|||
#define x86_64_r_uc_addr UNW_OBJ(r_uc_addr)
|
||||
#define x86_64_sigreturn UNW_OBJ(sigreturn)
|
||||
|
||||
/* By-pass calls to access_mem() when known to be safe. */
|
||||
#ifdef UNW_LOCAL_ONLY
|
||||
# undef ACCESS_MEM_FAST
|
||||
# define ACCESS_MEM_FAST(ret,validate,cur,addr,to) \
|
||||
do { \
|
||||
if (unlikely(validate)) \
|
||||
(ret) = dwarf_get ((cur), DWARF_MEM_LOC ((cur), (addr)), &(to)); \
|
||||
else \
|
||||
(ret) = 0, (to) = *(unw_word_t *)(addr); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
extern void x86_64_local_addr_space_init (void);
|
||||
extern int x86_64_local_resume (unw_addr_space_t as, unw_cursor_t *cursor,
|
||||
void *arg);
|
||||
|
|
Loading…
Add table
Reference in a new issue