mirror of
https://github.com/tobast/libunwind-eh_elf.git
synced 2024-11-30 02:27:38 +01:00
x86_64: Add stack alignment prologue tdep_trace fastpath
GCC versions 4.9~current will often generate stack alignment prologues like: lea 0x8(%rsp),%r10 and $0xfffffffffffffff0,%rsp ... push %rbp mov %rsp, %rbp push %r10 resulting in dwarf expressions: DW_CFA_def_cfa_expression (DW_OP_breg6: -8; DW_OP_deref) DW_CFA_expression: r6 (rbp) (DW_OP_breg6: 0) These prologues seem to be generated for SSE/AVX code, but sometimes other times as well. tdep_trace fastpath currently falls back to the slow dwarf parsing path if it encounters any cfa_expressions. Unfortunately this is happening often enough in our codebase to cause perf issues. We could also fix the fallback path (make the rs cache bigger, lock-free instead of locking, etc), but that seems like a separate issue, and it will ever be as fast as the tracing code. Our binaries each have at least ~100 functions in them like this. This patch teaches the tdep_trace about the two specific cfa_expressions, which really just result in a single extra memory dereference of the stack at a fixed offset from rbp.
This commit is contained in:
parent
e5fe9034ad
commit
f7fe1c9a7e
5 changed files with 96 additions and 3 deletions
|
@ -387,6 +387,7 @@ struct dwarf_callback_data
|
||||||
#define dwarf_put_unwind_info UNW_OBJ (dwarf_put_unwind_info)
|
#define dwarf_put_unwind_info UNW_OBJ (dwarf_put_unwind_info)
|
||||||
#define dwarf_put_unwind_info UNW_OBJ (dwarf_put_unwind_info)
|
#define dwarf_put_unwind_info UNW_OBJ (dwarf_put_unwind_info)
|
||||||
#define dwarf_eval_expr UNW_OBJ (dwarf_eval_expr)
|
#define dwarf_eval_expr UNW_OBJ (dwarf_eval_expr)
|
||||||
|
#define dwarf_stack_aligned UNW_OBJ (dwarf_stack_aligned)
|
||||||
#define dwarf_extract_proc_info_from_fde \
|
#define dwarf_extract_proc_info_from_fde \
|
||||||
UNW_OBJ (dwarf_extract_proc_info_from_fde)
|
UNW_OBJ (dwarf_extract_proc_info_from_fde)
|
||||||
#define dwarf_find_save_locs UNW_OBJ (dwarf_find_save_locs)
|
#define dwarf_find_save_locs UNW_OBJ (dwarf_find_save_locs)
|
||||||
|
@ -419,6 +420,10 @@ extern void dwarf_put_unwind_info (unw_addr_space_t as,
|
||||||
extern int dwarf_eval_expr (struct dwarf_cursor *c, unw_word_t *addr,
|
extern int dwarf_eval_expr (struct dwarf_cursor *c, unw_word_t *addr,
|
||||||
unw_word_t len, unw_word_t *valp,
|
unw_word_t len, unw_word_t *valp,
|
||||||
int *is_register);
|
int *is_register);
|
||||||
|
extern int
|
||||||
|
dwarf_stack_aligned(struct dwarf_cursor *c, unw_word_t cfa_addr,
|
||||||
|
unw_word_t rbp_addr, unw_word_t *offset);
|
||||||
|
|
||||||
extern int dwarf_extract_proc_info_from_fde (unw_addr_space_t as,
|
extern int dwarf_extract_proc_info_from_fde (unw_addr_space_t as,
|
||||||
unw_accessors_t *a,
|
unw_accessors_t *a,
|
||||||
unw_word_t *fde_addr,
|
unw_word_t *fde_addr,
|
||||||
|
|
|
@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
|
UNW_X86_64_FRAME_ALIGNED = -3, /* frame stack pointer aligned */
|
||||||
UNW_X86_64_FRAME_STANDARD = -2, /* regular rbp, rsp +/- offset */
|
UNW_X86_64_FRAME_STANDARD = -2, /* regular rbp, rsp +/- offset */
|
||||||
UNW_X86_64_FRAME_SIGRETURN = -1, /* special sigreturn frame */
|
UNW_X86_64_FRAME_SIGRETURN = -1, /* special sigreturn frame */
|
||||||
UNW_X86_64_FRAME_OTHER = 0, /* not cacheable (special or unrecognised) */
|
UNW_X86_64_FRAME_OTHER = 0, /* not cacheable (special or unrecognised) */
|
||||||
|
@ -50,10 +51,10 @@ unw_tdep_frame_type_t;
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
uint64_t virtual_address;
|
uint64_t virtual_address;
|
||||||
int64_t frame_type : 2; /* unw_tdep_frame_type_t classification */
|
int64_t frame_type : 3; /* unw_tdep_frame_type_t classification */
|
||||||
int64_t last_frame : 1; /* non-zero if last frame in chain */
|
int64_t last_frame : 1; /* non-zero if last frame in chain */
|
||||||
int64_t cfa_reg_rsp : 1; /* cfa dwarf base register is rsp vs. rbp */
|
int64_t cfa_reg_rsp : 1; /* cfa dwarf base register is rsp vs. rbp */
|
||||||
int64_t cfa_reg_offset : 30; /* cfa is at this offset from base register value */
|
int64_t cfa_reg_offset : 29; /* cfa is at this offset from base register value */
|
||||||
int64_t rbp_cfa_offset : 15; /* rbp saved at this offset from cfa (-1 = not saved) */
|
int64_t rbp_cfa_offset : 15; /* rbp saved at this offset from cfa (-1 = not saved) */
|
||||||
int64_t rsp_cfa_offset : 15; /* rsp saved at this offset from cfa (-1 = not saved) */
|
int64_t rsp_cfa_offset : 15; /* rsp saved at this offset from cfa (-1 = not saved) */
|
||||||
}
|
}
|
||||||
|
|
|
@ -186,6 +186,54 @@ read_operand (unw_addr_space_t as, unw_accessors_t *a,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HIDDEN int
|
||||||
|
dwarf_stack_aligned(struct dwarf_cursor *c, unw_word_t cfa_addr,
|
||||||
|
unw_word_t rbp_addr, unw_word_t *cfa_offset) {
|
||||||
|
unw_accessors_t *a;
|
||||||
|
int ret;
|
||||||
|
void *arg;
|
||||||
|
unw_word_t len;
|
||||||
|
uint8_t opcode;
|
||||||
|
unw_word_t operand1;
|
||||||
|
|
||||||
|
a = unw_get_accessors (c->as);
|
||||||
|
arg = c->as_arg;
|
||||||
|
|
||||||
|
ret = dwarf_read_uleb128(c->as, a, &rbp_addr, &len, arg);
|
||||||
|
if (len != 2 || ret < 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
ret = dwarf_readu8(c->as, a, &rbp_addr, &opcode, arg);
|
||||||
|
if (ret < 0 || opcode != DW_OP_breg6)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
ret = read_operand(c->as, a, &rbp_addr,
|
||||||
|
OPND1_TYPE(operands[opcode]), &operand1, arg);
|
||||||
|
|
||||||
|
if (ret < 0 || operand1 != 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
ret = dwarf_read_uleb128(c->as, a, &cfa_addr, &len, arg);
|
||||||
|
if (ret < 0 || len != 3)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
ret = dwarf_readu8(c->as, a, &cfa_addr, &opcode, arg);
|
||||||
|
if (ret < 0 || opcode != DW_OP_breg6)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
ret = read_operand(c->as, a, &cfa_addr,
|
||||||
|
OPND1_TYPE(operands[opcode]), &operand1, arg);
|
||||||
|
if (ret < 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
ret = dwarf_readu8(c->as, a, &cfa_addr, &opcode, arg);
|
||||||
|
if (ret < 0 || opcode != DW_OP_deref)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
*cfa_offset = operand1;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
HIDDEN int
|
HIDDEN int
|
||||||
dwarf_eval_expr (struct dwarf_cursor *c, unw_word_t *addr, unw_word_t len,
|
dwarf_eval_expr (struct dwarf_cursor *c, unw_word_t *addr, unw_word_t len,
|
||||||
unw_word_t *valp, int *is_register)
|
unw_word_t *valp, int *is_register)
|
||||||
|
|
|
@ -41,6 +41,23 @@ tdep_stash_frame (struct dwarf_cursor *d, struct dwarf_reg_state *rs)
|
||||||
rs->reg[RBP].where, rs->reg[RBP].val, DWARF_GET_LOC(d->loc[RBP]),
|
rs->reg[RBP].where, rs->reg[RBP].val, DWARF_GET_LOC(d->loc[RBP]),
|
||||||
rs->reg[RSP].where, rs->reg[RSP].val, DWARF_GET_LOC(d->loc[RSP]));
|
rs->reg[RSP].where, rs->reg[RSP].val, DWARF_GET_LOC(d->loc[RSP]));
|
||||||
|
|
||||||
|
if (rs->reg[DWARF_CFA_REG_COLUMN].where == DWARF_WHERE_EXPR &&
|
||||||
|
rs->reg[RBP].where == DWARF_WHERE_EXPR) {
|
||||||
|
/* Check for GCC generated alignment frame for rsp. A simple
|
||||||
|
* def_cfa_expr that loads a constant offset from rbp, where the
|
||||||
|
* addres of the rip was pushed on the stack */
|
||||||
|
unw_word_t cfa_addr = rs->reg[DWARF_CFA_REG_COLUMN].val;
|
||||||
|
unw_word_t rbp_addr = rs->reg[RBP].val;
|
||||||
|
unw_word_t cfa_offset;
|
||||||
|
|
||||||
|
int ret = dwarf_stack_aligned(d, cfa_addr, rbp_addr, &cfa_offset);
|
||||||
|
if (ret) {
|
||||||
|
f->frame_type = UNW_X86_64_FRAME_ALIGNED;
|
||||||
|
f->cfa_reg_offset = cfa_offset;
|
||||||
|
f->cfa_reg_rsp = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* A standard frame is defined as:
|
/* A standard frame is defined as:
|
||||||
- CFA is register-relative offset off RBP or RSP;
|
- CFA is register-relative offset off RBP or RSP;
|
||||||
- Return address is saved at CFA-8;
|
- Return address is saved at CFA-8;
|
||||||
|
@ -50,7 +67,7 @@ tdep_stash_frame (struct dwarf_cursor *d, struct dwarf_reg_state *rs)
|
||||||
&& (rs->reg[DWARF_CFA_REG_COLUMN].where == DWARF_WHERE_REG)
|
&& (rs->reg[DWARF_CFA_REG_COLUMN].where == DWARF_WHERE_REG)
|
||||||
&& (rs->reg[DWARF_CFA_REG_COLUMN].val == RBP
|
&& (rs->reg[DWARF_CFA_REG_COLUMN].val == RBP
|
||||||
|| rs->reg[DWARF_CFA_REG_COLUMN].val == RSP)
|
|| rs->reg[DWARF_CFA_REG_COLUMN].val == RSP)
|
||||||
&& labs((long) rs->reg[DWARF_CFA_OFF_COLUMN].val) < (1 << 29)
|
&& labs((long) rs->reg[DWARF_CFA_OFF_COLUMN].val) < (1 << 28)
|
||||||
&& DWARF_GET_LOC(d->loc[d->ret_addr_column]) == d->cfa-8
|
&& DWARF_GET_LOC(d->loc[d->ret_addr_column]) == d->cfa-8
|
||||||
&& (rs->reg[RBP].where == DWARF_WHERE_UNDEF
|
&& (rs->reg[RBP].where == DWARF_WHERE_UNDEF
|
||||||
|| rs->reg[RBP].where == DWARF_WHERE_SAME
|
|| rs->reg[RBP].where == DWARF_WHERE_SAME
|
||||||
|
@ -92,6 +109,10 @@ tdep_stash_frame (struct dwarf_cursor *d, struct dwarf_reg_state *rs)
|
||||||
Debug (4, " sigreturn frame\n");
|
Debug (4, " sigreturn frame\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
else if (f->frame_type == UNW_X86_64_FRAME_ALIGNED) {
|
||||||
|
Debug (4, " aligned frame, offset %li\n", f->cfa_reg_offset);
|
||||||
|
}
|
||||||
|
|
||||||
/* PLT and guessed RBP-walked frames are handled in unw_step(). */
|
/* PLT and guessed RBP-walked frames are handled in unw_step(). */
|
||||||
else
|
else
|
||||||
Debug (4, " unusual frame\n");
|
Debug (4, " unusual frame\n");
|
||||||
|
|
|
@ -506,6 +506,24 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
|
||||||
d->use_prev_instr = 0;
|
d->use_prev_instr = 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case UNW_X86_64_FRAME_ALIGNED:
|
||||||
|
/* Address of RIP was pushed on the stack via a simple
|
||||||
|
* def_cfa_expr - result stack offset stored in cfa_reg_offset */
|
||||||
|
cfa = (f->cfa_reg_rsp ? rsp : rbp) + f->cfa_reg_offset;
|
||||||
|
ACCESS_MEM_FAST(ret, c->validate, d, cfa, cfa);
|
||||||
|
if (likely(ret >= 0))
|
||||||
|
ACCESS_MEM_FAST(ret, c->validate, d, cfa - 8, rip);
|
||||||
|
if (likely(ret >= 0))
|
||||||
|
ACCESS_MEM_FAST(ret, c->validate, d, rbp, rbp);
|
||||||
|
|
||||||
|
/* Don't bother reading RSP from DWARF, CFA becomes new RSP. */
|
||||||
|
rsp = cfa;
|
||||||
|
|
||||||
|
/* Next frame needs to back up for unwind info lookup. */
|
||||||
|
d->use_prev_instr = 1;
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
/* We cannot trace through this frame, give up and tell the
|
/* We cannot trace through this frame, give up and tell the
|
||||||
caller we had to stop. Data collected so far may still be
|
caller we had to stop. Data collected so far may still be
|
||||||
|
|
Loading…
Reference in a new issue