From 93d8272481ed271ec64fe48accfe9084ae60e8c2 Mon Sep 17 00:00:00 2001 From: Filip Nyback Date: Wed, 30 Jul 2014 14:34:31 +0300 Subject: [PATCH] Add fast stack trace on AArch64. Port of the fast stack trace feature to AArch64. --- include/tdep-aarch64/libunwind_i.h | 32 +- src/Makefile.am | 5 +- src/aarch64/Gstash_frame.c | 89 +++++ src/aarch64/Gstep.c | 2 + src/aarch64/Gtrace.c | 548 +++++++++++++++++++++++++++++ src/aarch64/Lstash_frame.c | 5 + src/aarch64/Ltrace.c | 5 + src/aarch64/getcontext.S | 52 +++ src/aarch64/unwind_i.h | 19 + 9 files changed, 751 insertions(+), 6 deletions(-) create mode 100644 src/aarch64/Gstash_frame.c create mode 100644 src/aarch64/Gtrace.c create mode 100644 src/aarch64/Lstash_frame.c create mode 100644 src/aarch64/Ltrace.c create mode 100644 src/aarch64/getcontext.S diff --git a/include/tdep-aarch64/libunwind_i.h b/include/tdep-aarch64/libunwind_i.h index b162271c..9f0c0cc3 100644 --- a/include/tdep-aarch64/libunwind_i.h +++ b/include/tdep-aarch64/libunwind_i.h @@ -37,9 +37,25 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "mempool.h" #include "dwarf.h" +typedef enum + { + UNW_AARCH64_FRAME_STANDARD = -2, /* regular fp, sp +/- offset */ + UNW_AARCH64_FRAME_SIGRETURN = -1, /* special sigreturn frame */ + UNW_AARCH64_FRAME_OTHER = 0, /* not cacheable (special or unrecognised) */ + UNW_AARCH64_FRAME_GUESSED = 1 /* guessed it was regular, but not known */ + } +unw_tdep_frame_type_t; + typedef struct { - /* no aarch64-specific fast trace */ + uint64_t virtual_address; + int64_t frame_type : 2; /* unw_tdep_frame_type_t classification */ + int64_t last_frame : 1; /* non-zero if last frame in chain */ + int64_t cfa_reg_sp : 1; /* cfa dwarf base register is sp vs. fp */ + int64_t cfa_reg_offset : 30; /* cfa is at this offset from base register value */ + int64_t fp_cfa_offset : 30; /* fp saved at this offset from cfa (-1 = not saved) */ + int64_t lr_cfa_offset : 30; /* lr saved at this offset from cfa (-1 = not saved) */ + int64_t sp_cfa_offset : 30; /* sp saved at this offset from cfa (-1 = not saved) */ } unw_tdep_frame_t; @@ -76,6 +92,9 @@ struct unw_addr_space struct cursor { struct dwarf_cursor dwarf; /* must be first */ + + unw_tdep_frame_t frame_info; /* quick tracing assist info */ + enum { AARCH64_SCF_NONE, @@ -85,6 +104,7 @@ struct cursor unw_word_t sigcontext_addr; unw_word_t sigcontext_sp; unw_word_t sigcontext_pc; + int validate; }; #define DWARF_GET_LOC(l) ((l).val) @@ -240,7 +260,7 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val) -#define tdep_getcontext_trace unw_getcontext +#define tdep_getcontext_trace UNW_ARCH_OBJ(getcontext_trace) #define tdep_init_done UNW_OBJ(init_done) #define tdep_init UNW_OBJ(init) /* Platforms that support UNW_INFO_FORMAT_TABLE need to define @@ -254,8 +274,8 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val) #define tdep_fetch_frame(c,ip,n) do {} while(0) #define tdep_cache_frame(c,rs) do {} while(0) #define tdep_reuse_frame(c,rs) do {} while(0) -#define tdep_stash_frame(c,rs) do {} while(0) -#define tdep_trace(cur,addr,n) (-UNW_ENOINFO) +#define tdep_stash_frame UNW_OBJ(tdep_stash_frame) +#define tdep_trace UNW_OBJ(tdep_trace) #ifdef UNW_LOCAL_ONLY # define tdep_find_proc_info(c,ip,n) \ @@ -290,5 +310,9 @@ extern int tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp, int write); extern int tdep_access_fpreg (struct cursor *c, unw_regnum_t reg, unw_fpreg_t *valp, int write); +extern int tdep_trace (unw_cursor_t *cursor, void **addresses, int *n); +extern void tdep_stash_frame (struct dwarf_cursor *c, + struct dwarf_reg_state *rs); +extern int tdep_getcontext_trace (unw_tdep_context_t *); #endif /* AARCH64_LIBUNWIND_I_H */ diff --git a/src/Makefile.am b/src/Makefile.am index efa976b2..451b8dbd 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -198,7 +198,8 @@ libunwind_la_SOURCES_aarch64 = $(libunwind_la_SOURCES_aarch64_common) \ aarch64/Lget_save_loc.c aarch64/Lglobal.c aarch64/Linit.c \ aarch64/Linit_local.c aarch64/Linit_remote.c \ aarch64/Lis_signal_frame.c aarch64/Lregs.c aarch64/Lresume.c \ - aarch64/Lstep.c + aarch64/Lstash_frame.c aarch64/Lstep.c aarch64/Ltrace.c \ + aarch64/getcontext.S libunwind_aarch64_la_SOURCES_aarch64 = $(libunwind_la_SOURCES_aarch64_common) \ $(libunwind_la_SOURCES_generic) \ @@ -206,7 +207,7 @@ libunwind_aarch64_la_SOURCES_aarch64 = $(libunwind_la_SOURCES_aarch64_common) \ aarch64/Gget_save_loc.c aarch64/Gglobal.c aarch64/Ginit.c \ aarch64/Ginit_local.c aarch64/Ginit_remote.c \ aarch64/Gis_signal_frame.c aarch64/Gregs.c aarch64/Gresume.c \ - aarch64/Gstep.c + aarch64/Gstash_frame.c aarch64/Gstep.c aarch64/Gtrace.c # The list of files that go into libunwind and libunwind-arm: noinst_HEADERS += arm/init.h arm/offsets.h arm/unwind_i.h diff --git a/src/aarch64/Gstash_frame.c b/src/aarch64/Gstash_frame.c new file mode 100644 index 00000000..da1ff2cb --- /dev/null +++ b/src/aarch64/Gstash_frame.c @@ -0,0 +1,89 @@ +/* libunwind - a platform-independent unwind library + Copyright (C) 2010, 2011 by FERMI NATIONAL ACCELERATOR LABORATORY + Copyright (C) 2014 CERN and Aalto University + Contributed by Filip Nyback + +This file is part of libunwind. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "unwind_i.h" + +HIDDEN void +tdep_stash_frame (struct dwarf_cursor *d, struct dwarf_reg_state *rs) +{ + struct cursor *c = (struct cursor *) dwarf_to_cursor (d); + unw_tdep_frame_t *f = &c->frame_info; + + Debug (4, "ip=0x%lx cfa=0x%lx type %d cfa [where=%d val=%ld] cfaoff=%ld" + " ra=0x%lx fp [where=%d val=%ld @0x%lx] lr [where=%d val=%ld @0x%lx] " + "sp [where=%d val=%ld @0x%lx]\n", + d->ip, d->cfa, f->frame_type, + rs->reg[DWARF_CFA_REG_COLUMN].where, + rs->reg[DWARF_CFA_REG_COLUMN].val, + rs->reg[DWARF_CFA_OFF_COLUMN].val, + DWARF_GET_LOC(d->loc[d->ret_addr_column]), + rs->reg[FP].where, rs->reg[FP].val, DWARF_GET_LOC(d->loc[FP]), + rs->reg[LR].where, rs->reg[LR].val, DWARF_GET_LOC(d->loc[LR]), + rs->reg[SP].where, rs->reg[SP].val, DWARF_GET_LOC(d->loc[SP])); + + /* A standard frame is defined as: + - CFA is register-relative offset off FP or SP; + - Return address is saved in LR; + - FP is unsaved or saved at CFA+offset, offset != -1; + - LR is unsaved or saved at CFA+offset, offset != -1; + - SP is unsaved or saved at CFA+offset, offset != -1. */ + if (f->frame_type == UNW_AARCH64_FRAME_OTHER + && (rs->reg[DWARF_CFA_REG_COLUMN].where == DWARF_WHERE_REG) + && (rs->reg[DWARF_CFA_REG_COLUMN].val == FP + || rs->reg[DWARF_CFA_REG_COLUMN].val == SP) + && labs(rs->reg[DWARF_CFA_OFF_COLUMN].val) < (1 << 29) + && d->ret_addr_column == LR + && (rs->reg[FP].where == DWARF_WHERE_UNDEF + || rs->reg[FP].where == DWARF_WHERE_SAME + || (rs->reg[FP].where == DWARF_WHERE_CFAREL + && labs(rs->reg[FP].val) < (1 << 29) + && rs->reg[FP].val+1 != 0)) + && (rs->reg[LR].where == DWARF_WHERE_UNDEF + || rs->reg[LR].where == DWARF_WHERE_SAME + || (rs->reg[LR].where == DWARF_WHERE_CFAREL + && labs(rs->reg[LR].val) < (1 << 29) + && rs->reg[LR].val+1 != 0)) + && (rs->reg[SP].where == DWARF_WHERE_UNDEF + || rs->reg[SP].where == DWARF_WHERE_SAME + || (rs->reg[SP].where == DWARF_WHERE_CFAREL + && labs(rs->reg[SP].val) < (1 << 29) + && rs->reg[SP].val+1 != 0))) + { + /* Save information for a standard frame. */ + f->frame_type = UNW_AARCH64_FRAME_STANDARD; + f->cfa_reg_sp = (rs->reg[DWARF_CFA_REG_COLUMN].val == SP); + f->cfa_reg_offset = rs->reg[DWARF_CFA_OFF_COLUMN].val; + if (rs->reg[FP].where == DWARF_WHERE_CFAREL) + f->fp_cfa_offset = rs->reg[FP].val; + if (rs->reg[LR].where == DWARF_WHERE_CFAREL) + f->lr_cfa_offset = rs->reg[LR].val; + if (rs->reg[SP].where == DWARF_WHERE_CFAREL) + f->sp_cfa_offset = rs->reg[SP].val; + Debug (4, " standard frame\n"); + } + else + Debug (4, " unusual frame\n"); +} diff --git a/src/aarch64/Gstep.c b/src/aarch64/Gstep.c index 4aa15d1b..61891f82 100644 --- a/src/aarch64/Gstep.c +++ b/src/aarch64/Gstep.c @@ -55,6 +55,8 @@ unw_handle_signal_frame (unw_cursor_t *cursor) return -UNW_EUNSPEC; c->sigcontext_addr = sc_addr; + c->frame_info.frame_type = UNW_AARCH64_FRAME_SIGRETURN; + c->frame_info.cfa_reg_offset = sc_addr - sp_addr; /* Update the dwarf cursor. Set the location of the registers to the corresponding addresses of the diff --git a/src/aarch64/Gtrace.c b/src/aarch64/Gtrace.c new file mode 100644 index 00000000..c64cc764 --- /dev/null +++ b/src/aarch64/Gtrace.c @@ -0,0 +1,548 @@ +/* libunwind - a platform-independent unwind library + Copyright (C) 2010, 2011 by FERMI NATIONAL ACCELERATOR LABORATORY + Copyright (C) 2014 CERN and Aalto University + Contributed by Filip Nyback + +This file is part of libunwind. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "unwind_i.h" +#include "offsets.h" +#include +#include + +#pragma weak pthread_once +#pragma weak pthread_key_create +#pragma weak pthread_getspecific +#pragma weak pthread_setspecific + +/* Initial hash table size. Table expands by 2 bits (times four). */ +#define HASH_MIN_BITS 14 + +typedef struct +{ + unw_tdep_frame_t *frames; + size_t log_size; + size_t used; + size_t dtor_count; /* Counts how many times our destructor has already + been called. */ +} unw_trace_cache_t; + +static const unw_tdep_frame_t empty_frame = { 0, UNW_AARCH64_FRAME_OTHER, -1, -1, 0, -1, -1, -1 }; +static define_lock (trace_init_lock); +static pthread_once_t trace_cache_once = PTHREAD_ONCE_INIT; +static sig_atomic_t trace_cache_once_happen; +static pthread_key_t trace_cache_key; +static struct mempool trace_cache_pool; +static __thread unw_trace_cache_t *tls_cache; +static __thread int tls_cache_destroyed; + +/* Free memory for a thread's trace cache. */ +static void +trace_cache_free (void *arg) +{ + unw_trace_cache_t *cache = arg; + if (++cache->dtor_count < PTHREAD_DESTRUCTOR_ITERATIONS) + { + /* Not yet our turn to get destroyed. Re-install ourselves into the key. */ + pthread_setspecific(trace_cache_key, cache); + Debug(5, "delayed freeing cache %p (%zx to go)\n", cache, + PTHREAD_DESTRUCTOR_ITERATIONS - cache->dtor_count); + return; + } + tls_cache_destroyed = 1; + tls_cache = NULL; + munmap (cache->frames, (1u << cache->log_size) * sizeof(unw_tdep_frame_t)); + mempool_free (&trace_cache_pool, cache); + Debug(5, "freed cache %p\n", cache); +} + +/* Initialise frame tracing for threaded use. */ +static void +trace_cache_init_once (void) +{ + pthread_key_create (&trace_cache_key, &trace_cache_free); + mempool_init (&trace_cache_pool, sizeof (unw_trace_cache_t), 0); + trace_cache_once_happen = 1; +} + +static unw_tdep_frame_t * +trace_cache_buckets (size_t n) +{ + unw_tdep_frame_t *frames; + size_t i; + + GET_MEMORY(frames, n * sizeof (unw_tdep_frame_t)); + if (likely(frames != NULL)) + for (i = 0; i < n; ++i) + frames[i] = empty_frame; + + return frames; +} + +/* Allocate and initialise hash table for frame cache lookups. + Returns the cache initialised with (1u << HASH_LOW_BITS) hash + buckets, or NULL if there was a memory allocation problem. */ +static unw_trace_cache_t * +trace_cache_create (void) +{ + unw_trace_cache_t *cache; + + if (tls_cache_destroyed) + { + /* The current thread is in the process of exiting. Don't recreate + cache, as we wouldn't have another chance to free it. */ + Debug(5, "refusing to reallocate cache: " + "thread-locals are being deallocated\n"); + return NULL; + } + + if (! (cache = mempool_alloc(&trace_cache_pool))) + { + Debug(5, "failed to allocate cache\n"); + return NULL; + } + + if (! (cache->frames = trace_cache_buckets(1u << HASH_MIN_BITS))) + { + Debug(5, "failed to allocate buckets\n"); + mempool_free(&trace_cache_pool, cache); + return NULL; + } + + cache->log_size = HASH_MIN_BITS; + cache->used = 0; + cache->dtor_count = 0; + tls_cache_destroyed = 0; /* Paranoia: should already be 0. */ + Debug(5, "allocated cache %p\n", cache); + return cache; +} + +/* Expand the hash table in the frame cache if possible. This always + quadruples the hash size, and clears all previous frame entries. */ +static int +trace_cache_expand (unw_trace_cache_t *cache) +{ + size_t old_size = (1u << cache->log_size); + size_t new_log_size = cache->log_size + 2; + unw_tdep_frame_t *new_frames = trace_cache_buckets (1u << new_log_size); + + if (unlikely(! new_frames)) + { + Debug(5, "failed to expand cache to 2^%lu buckets\n", new_log_size); + return -UNW_ENOMEM; + } + + Debug(5, "expanded cache from 2^%lu to 2^%lu buckets\n", cache->log_size, new_log_size); + munmap(cache->frames, old_size * sizeof(unw_tdep_frame_t)); + cache->frames = new_frames; + cache->log_size = new_log_size; + cache->used = 0; + return 0; +} + +static unw_trace_cache_t * +trace_cache_get_unthreaded (void) +{ + unw_trace_cache_t *cache; + intrmask_t saved_mask; + static unw_trace_cache_t *global_cache = NULL; + lock_acquire (&trace_init_lock, saved_mask); + if (! global_cache) + { + mempool_init (&trace_cache_pool, sizeof (unw_trace_cache_t), 0); + global_cache = trace_cache_create (); + } + cache = global_cache; + lock_release (&trace_init_lock, saved_mask); + Debug(5, "using cache %p\n", cache); + return cache; +} + +/* Get the frame cache for the current thread. Create it if there is none. */ +static unw_trace_cache_t * +trace_cache_get (void) +{ + unw_trace_cache_t *cache; + if (likely (pthread_once != NULL)) + { + pthread_once(&trace_cache_once, &trace_cache_init_once); + if (!trace_cache_once_happen) + { + return trace_cache_get_unthreaded(); + } + if (! (cache = tls_cache)) + { + cache = trace_cache_create(); + pthread_setspecific(trace_cache_key, cache); + tls_cache = cache; + } + Debug(5, "using cache %p\n", cache); + return cache; + } + else + { + return trace_cache_get_unthreaded(); + } +} + +/* Initialise frame properties for address cache slot F at address + PC using current CFA, FP and SP values. Modifies CURSOR to + that location, performs one unw_step(), and fills F with what + was discovered about the location. Returns F. + + FIXME: This probably should tell DWARF handling to never evaluate + or use registers other than FP, SP and PC in case there is + highly unusual unwind info which uses these creatively. */ +static unw_tdep_frame_t * +trace_init_addr (unw_tdep_frame_t *f, + unw_cursor_t *cursor, + unw_word_t cfa, + unw_word_t pc, + unw_word_t fp, + unw_word_t sp) +{ + struct cursor *c = (struct cursor *) cursor; + struct dwarf_cursor *d = &c->dwarf; + int ret = -UNW_EINVAL; + + /* Initialise frame properties: unknown, not last. */ + f->virtual_address = pc; + f->frame_type = UNW_AARCH64_FRAME_OTHER; + f->last_frame = 0; + f->cfa_reg_sp = -1; + f->cfa_reg_offset = 0; + f->fp_cfa_offset = -1; + f->lr_cfa_offset = -1; + f->sp_cfa_offset = -1; + + /* Reinitialise cursor to this instruction - but undo next/prev PC + adjustment because unw_step will redo it - and force PC, FP and + SP into register locations (=~ ucontext we keep), then set + their desired values. Then perform the step. */ + d->ip = pc + d->use_prev_instr; + d->cfa = cfa; + d->loc[UNW_AARCH64_X29] = DWARF_REG_LOC (d, UNW_AARCH64_X29); + d->loc[UNW_AARCH64_SP] = DWARF_REG_LOC (d, UNW_AARCH64_SP); + d->loc[UNW_AARCH64_PC] = DWARF_REG_LOC (d, UNW_AARCH64_PC); + c->frame_info = *f; + + if (likely(dwarf_put (d, d->loc[UNW_AARCH64_X29], fp) >= 0) + && likely(dwarf_put (d, d->loc[UNW_AARCH64_SP], sp) >= 0) + && likely(dwarf_put (d, d->loc[UNW_AARCH64_PC], pc) >= 0) + && likely((ret = unw_step (cursor)) >= 0)) + *f = c->frame_info; + + /* If unw_step() stopped voluntarily, remember that, even if it + otherwise could not determine anything useful. This avoids + failing trace if we hit frames without unwind info, which is + common for the outermost frame (CRT stuff) on many systems. + This avoids failing trace in very common circumstances; failing + to unw_step() loop wouldn't produce any better result. */ + if (ret == 0) + f->last_frame = -1; + + Debug (3, "frame va %lx type %d last %d cfa %s+%d fp @ cfa%+d lr @ cfa%+d sp @ cfa%+d\n", + f->virtual_address, f->frame_type, f->last_frame, + f->cfa_reg_sp ? "sp" : "fp", f->cfa_reg_offset, + f->fp_cfa_offset, f->lr_cfa_offset, f->sp_cfa_offset); + + return f; +} + +/* Look up and if necessary fill in frame attributes for address PC + in CACHE using current CFA, FP and SP values. Uses CURSOR to + perform any unwind steps necessary to fill the cache. Returns the + frame cache slot which describes RIP. */ +static unw_tdep_frame_t * +trace_lookup (unw_cursor_t *cursor, + unw_trace_cache_t *cache, + unw_word_t cfa, + unw_word_t pc, + unw_word_t fp, + unw_word_t sp) +{ + /* First look up for previously cached information using cache as + linear probing hash table with probe step of 1. Majority of + lookups should be completed within few steps, but it is very + important the hash table does not fill up, or performance falls + off the cliff. */ + uint64_t i, addr; + uint64_t cache_size = 1u << cache->log_size; + uint64_t slot = ((pc * 0x9e3779b97f4a7c16) >> 43) & (cache_size-1); + unw_tdep_frame_t *frame; + + for (i = 0; i < 16; ++i) + { + frame = &cache->frames[slot]; + addr = frame->virtual_address; + + /* Return if we found the address. */ + if (likely(addr == pc)) + { + Debug (4, "found address after %ld steps\n", i); + return frame; + } + + /* If slot is empty, reuse it. */ + if (likely(! addr)) + break; + + /* Linear probe to next slot candidate, step = 1. */ + if (++slot >= cache_size) + slot -= cache_size; + } + + /* If we collided after 16 steps, or if the hash is more than half + full, force the hash to expand. Fill the selected slot, whether + it's free or collides. Note that hash expansion drops previous + contents; further lookups will refill the hash. */ + Debug (4, "updating slot %lu after %ld steps, replacing 0x%lx\n", slot, i, addr); + if (unlikely(addr || cache->used >= cache_size / 2)) + { + if (unlikely(trace_cache_expand (cache) < 0)) + return NULL; + + cache_size = 1u << cache->log_size; + slot = ((pc * 0x9e3779b97f4a7c16) >> 43) & (cache_size-1); + frame = &cache->frames[slot]; + addr = frame->virtual_address; + } + + if (! addr) + ++cache->used; + + return trace_init_addr (frame, cursor, cfa, pc, fp, sp); +} + +/* Fast stack backtrace for AArch64. + + This is used by backtrace() implementation to accelerate frequent + queries for current stack, without any desire to unwind. It fills + BUFFER with the call tree from CURSOR upwards for at most SIZE + stack levels. The first frame, backtrace itself, is omitted. When + called, SIZE should give the maximum number of entries that can be + stored into BUFFER. Uses an internal thread-specific cache to + accelerate queries. + + The caller should fall back to a unw_step() loop if this function + fails by returning -UNW_ESTOPUNWIND, meaning the routine hit a + stack frame that is too complex to be traced in the fast path. + + This function is tuned for clients which only need to walk the + stack to get the call tree as fast as possible but without any + other details, for example profilers sampling the stack thousands + to millions of times per second. The routine handles the most + common AArch64 ABI stack layouts: CFA is FP or SP plus/minus + constant offset, return address is in LR, and FP, LR and SP are + either unchanged or saved on stack at constant offset from the CFA; + the signal return frame; and frames without unwind info provided + they are at the outermost (final) frame or can conservatively be + assumed to be frame-pointer based. + + Any other stack layout will cause the routine to give up. There + are only a handful of relatively rarely used functions which do + not have a stack in the standard form: vfork, longjmp, setcontext + and _dl_runtime_profile on common linux systems for example. + + On success BUFFER and *SIZE reflect the trace progress up to *SIZE + stack levels or the outermost frame, which ever is less. It may + stop short of outermost frame if unw_step() loop would also do so, + e.g. if there is no more unwind information; this is not reported + as an error. + + The function returns a negative value for errors, -UNW_ESTOPUNWIND + if tracing stopped because of an unusual frame unwind info. The + BUFFER and *SIZE reflect tracing progress up to the error frame. + + Callers of this function would normally look like this: + + unw_cursor_t cur; + unw_context_t ctx; + void addrs[128]; + int depth = 128; + int ret; + + unw_getcontext(&ctx); + unw_init_local(&cur, &ctx); + if ((ret = unw_tdep_trace(&cur, addrs, &depth)) < 0) + { + depth = 0; + unw_getcontext(&ctx); + unw_init_local(&cur, &ctx); + while ((ret = unw_step(&cur)) > 0 && depth < 128) + { + unw_word_t ip; + unw_get_reg(&cur, UNW_REG_IP, &ip); + addresses[depth++] = (void *) ip; + } + } +*/ +HIDDEN int +tdep_trace (unw_cursor_t *cursor, void **buffer, int *size) +{ + struct cursor *c = (struct cursor *) cursor; + struct dwarf_cursor *d = &c->dwarf; + unw_trace_cache_t *cache; + unw_word_t fp, sp, pc, cfa, lr; + int maxdepth = 0; + int depth = 0; + int ret; + + /* Check input parametres. */ + if (unlikely(! cursor || ! buffer || ! size || (maxdepth = *size) <= 0)) + return -UNW_EINVAL; + + Debug (1, "begin ip 0x%lx cfa 0x%lx\n", d->ip, d->cfa); + + /* Tell core dwarf routines to call back to us. */ + d->stash_frames = 1; + + /* Determine initial register values. These are direct access safe + because we know they come from the initial machine context. */ + pc = d->ip; + sp = cfa = d->cfa; + ACCESS_MEM_FAST(ret, 0, d, DWARF_GET_LOC(d->loc[UNW_AARCH64_X29]), fp); + assert(ret == 0); + lr = 0; + + /* Get frame cache. */ + if (unlikely(! (cache = trace_cache_get()))) + { + Debug (1, "returning %d, cannot get trace cache\n", -UNW_ENOMEM); + *size = 0; + d->stash_frames = 0; + return -UNW_ENOMEM; + } + + /* Trace the stack upwards, starting from current RIP. Adjust + the RIP address for previous/next instruction as the main + unwinding logic would also do. We undo this before calling + back into unw_step(). */ + while (depth < maxdepth) + { + pc -= d->use_prev_instr; + Debug (2, "depth %d cfa 0x%lx pc 0x%lx sp 0x%lx fp 0x%lx\n", + depth, cfa, pc, sp, fp); + + /* See if we have this address cached. If not, evaluate enough of + the dwarf unwind information to fill the cache line data, or to + decide this frame cannot be handled in fast trace mode. We + cache negative results too to prevent unnecessary dwarf parsing + for common failures. */ + unw_tdep_frame_t *f = trace_lookup (cursor, cache, cfa, pc, fp, sp); + + /* If we don't have information for this frame, give up. */ + if (unlikely(! f)) + { + ret = -UNW_ENOINFO; + break; + } + + Debug (3, "frame va %lx type %d last %d cfa %s+%d fp @ cfa%+d lr @ cfa%+d sp @ cfa%+d\n", + f->virtual_address, f->frame_type, f->last_frame, + f->cfa_reg_sp ? "sp" : "fp", f->cfa_reg_offset, + f->fp_cfa_offset, f->lr_cfa_offset, f->sp_cfa_offset); + + assert (f->virtual_address == pc); + + /* Stop if this was the last frame. In particular don't evaluate + new register values as it may not be safe - we don't normally + run with full validation on, and do not want to - and there's + enough bad unwind info floating around that we need to trust + what unw_step() previously said, in potentially bogus frames. */ + if (f->last_frame) + break; + + /* Evaluate CFA and registers for the next frame. */ + switch (f->frame_type) + { + case UNW_AARCH64_FRAME_GUESSED: + /* Fall thru to standard processing after forcing validation. */ + c->validate = 1; + + case UNW_AARCH64_FRAME_STANDARD: + /* Advance standard traceable frame. */ + cfa = (f->cfa_reg_sp ? sp : fp) + f->cfa_reg_offset; + if (likely(f->lr_cfa_offset != -1)) + ACCESS_MEM_FAST(ret, c->validate, d, cfa + f->lr_cfa_offset, pc); + else if (lr != 0) + { + /* Use the saved link register as the new pc. */ + pc = lr; + lr = 0; + } + if (likely(ret >= 0) && likely(f->fp_cfa_offset != -1)) + ACCESS_MEM_FAST(ret, c->validate, d, cfa + f->fp_cfa_offset, fp); + + /* Don't bother reading SP from DWARF, CFA becomes new SP. */ + sp = cfa; + + /* Next frame needs to back up for unwind info lookup. */ + d->use_prev_instr = 1; + break; + + case UNW_AARCH64_FRAME_SIGRETURN: + cfa = cfa + f->cfa_reg_offset; /* cfa now points to ucontext_t. */ + + ACCESS_MEM_FAST(ret, c->validate, d, cfa + LINUX_SC_PC_OFF, pc); + if (likely(ret >= 0)) + ACCESS_MEM_FAST(ret, c->validate, d, cfa + LINUX_SC_X29_OFF, fp); + if (likely(ret >= 0)) + ACCESS_MEM_FAST(ret, c->validate, d, cfa + LINUX_SC_SP_OFF, sp); + /* Save the link register here in case we end up in a function that + doesn't save the link register in the prologue, e.g. kill. */ + if (likely(ret >= 0)) + ACCESS_MEM_FAST(ret, c->validate, d, cfa + LINUX_SC_X30_OFF, lr); + + /* Resume stack at signal restoration point. The stack is not + necessarily continuous here, especially with sigaltstack(). */ + cfa = sp; + + /* Next frame should not back up. */ + d->use_prev_instr = 0; + break; + + default: + /* We cannot trace through this frame, give up and tell the + caller we had to stop. Data collected so far may still be + useful to the caller, so let it know how far we got. */ + ret = -UNW_ESTOPUNWIND; + break; + } + + Debug (4, "new cfa 0x%lx pc 0x%lx sp 0x%lx fp 0x%lx\n", + cfa, pc, sp, fp); + + /* If we failed or ended up somewhere bogus, stop. */ + if (unlikely(ret < 0 || pc < 0x4000)) + break; + + /* Record this address in stack trace. We skipped the first address. */ + buffer[depth++] = (void *) (pc - d->use_prev_instr); + } + +#if UNW_DEBUG + Debug (1, "returning %d, depth %d\n", ret, depth); +#endif + *size = depth; + return ret; +} diff --git a/src/aarch64/Lstash_frame.c b/src/aarch64/Lstash_frame.c new file mode 100644 index 00000000..77587803 --- /dev/null +++ b/src/aarch64/Lstash_frame.c @@ -0,0 +1,5 @@ +#define UNW_LOCAL_ONLY +#include +#if defined(UNW_LOCAL_ONLY) && !defined(UNW_REMOTE_ONLY) +#include "Gstash_frame.c" +#endif diff --git a/src/aarch64/Ltrace.c b/src/aarch64/Ltrace.c new file mode 100644 index 00000000..fcd3f239 --- /dev/null +++ b/src/aarch64/Ltrace.c @@ -0,0 +1,5 @@ +#define UNW_LOCAL_ONLY +#include +#if defined(UNW_LOCAL_ONLY) && !defined(UNW_REMOTE_ONLY) +#include "Gtrace.c" +#endif diff --git a/src/aarch64/getcontext.S b/src/aarch64/getcontext.S new file mode 100644 index 00000000..25ed5b66 --- /dev/null +++ b/src/aarch64/getcontext.S @@ -0,0 +1,52 @@ +/* libunwind - a platform-independent unwind library + Copyright (C) 2008 Google, Inc + Contributed by Paul Pluzhnikov + Copyright (C) 2010 Konstantin Belousov + +This file is part of libunwind. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "offsets.h" + +/* int _Uaarch64_getcontext_trace (unw_tdep_context_t *ucp) + + Saves limited machine context in UCP necessary for fast trace. If fast trace + fails, caller will have to get the full context. +*/ + + .global _Uaarch64_getcontext_trace + .hidden _Uaarch64_getcontext_trace + .type _Uaarch64_getcontext_trace, @function +_Uaarch64_getcontext_trace: + .cfi_startproc + + /* Save only FP, SP, PC - exclude this call. */ + str x29, [x0, #(LINUX_UC_MCONTEXT_OFF + LINUX_SC_X29_OFF)] + mov x9, sp + str x9, [x0, #(LINUX_UC_MCONTEXT_OFF + LINUX_SC_SP_OFF)] + str x30, [x0, #(LINUX_UC_MCONTEXT_OFF + LINUX_SC_PC_OFF)] + + ret + .cfi_endproc + .size _Uaarch64_getcontext_trace, . - _Uaarch64_getcontext_trace + + /* We do not need executable stack. */ + .section .note.GNU-stack,"",@progbits diff --git a/src/aarch64/unwind_i.h b/src/aarch64/unwind_i.h index e4947a3c..2bea8f3d 100644 --- a/src/aarch64/unwind_i.h +++ b/src/aarch64/unwind_i.h @@ -32,6 +32,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "libunwind_i.h" +/* DWARF column numbers for AArch64: */ +#define X29 29 +#define FP 29 +#define X30 30 +#define LR 30 +#define SP 31 + #define aarch64_lock UNW_OBJ(lock) #define aarch64_local_resume UNW_OBJ(local_resume) #define aarch64_local_addr_space_init UNW_OBJ(local_addr_space_init) @@ -40,4 +47,16 @@ extern void aarch64_local_addr_space_init (void); extern int aarch64_local_resume (unw_addr_space_t as, unw_cursor_t *cursor, void *arg); +/* By-pass calls to access_mem() when known to be safe. */ +#ifdef UNW_LOCAL_ONLY +# undef ACCESS_MEM_FAST +# define ACCESS_MEM_FAST(ret,validate,cur,addr,to) \ + do { \ + if (unlikely(validate)) \ + (ret) = dwarf_get ((cur), DWARF_MEM_LOC ((cur), (addr)), &(to)); \ + else \ + (ret) = 0, (to) = *(unw_word_t *)(addr); \ + } while (0) +#endif + #endif /* unwind_i_h */