1
0
Fork 0
mirror of https://github.com/tobast/libunwind-eh_elf.git synced 2024-11-29 18:27:37 +01:00

x86_64: Add stack alignment prologue tdep_trace fastpath

GCC versions 4.9~current will often generate stack alignment prologues like:

lea 0x8(%rsp),%r10
and $0xfffffffffffffff0,%rsp
...
push %rbp
mov %rsp, %rbp
push %r10

resulting in dwarf expressions:
DW_CFA_def_cfa_expression (DW_OP_breg6: -8; DW_OP_deref)
DW_CFA_expression: r6 (rbp) (DW_OP_breg6: 0)

These prologues seem to be generated for SSE/AVX code, but sometimes
other times as well.

tdep_trace fastpath currently falls back to the slow dwarf parsing path
if it encounters any cfa_expressions. Unfortunately this is happening
often enough in our codebase to cause perf issues.  We could also fix the
fallback path (make the rs cache bigger, lock-free instead of locking, etc),
but that seems like a separate issue, and it will ever be as fast as the tracing
code.   Our binaries each have at least ~100 functions in them like this.

This patch teaches the tdep_trace about the two specific cfa_expressions,
which really just result in a single extra memory dereference of the stack
at a fixed offset from rbp.
This commit is contained in:
Dave Watson 2016-11-28 15:50:58 -08:00
parent e5fe9034ad
commit f7fe1c9a7e
5 changed files with 96 additions and 3 deletions

View file

@ -387,6 +387,7 @@ struct dwarf_callback_data
#define dwarf_put_unwind_info UNW_OBJ (dwarf_put_unwind_info) #define dwarf_put_unwind_info UNW_OBJ (dwarf_put_unwind_info)
#define dwarf_put_unwind_info UNW_OBJ (dwarf_put_unwind_info) #define dwarf_put_unwind_info UNW_OBJ (dwarf_put_unwind_info)
#define dwarf_eval_expr UNW_OBJ (dwarf_eval_expr) #define dwarf_eval_expr UNW_OBJ (dwarf_eval_expr)
#define dwarf_stack_aligned UNW_OBJ (dwarf_stack_aligned)
#define dwarf_extract_proc_info_from_fde \ #define dwarf_extract_proc_info_from_fde \
UNW_OBJ (dwarf_extract_proc_info_from_fde) UNW_OBJ (dwarf_extract_proc_info_from_fde)
#define dwarf_find_save_locs UNW_OBJ (dwarf_find_save_locs) #define dwarf_find_save_locs UNW_OBJ (dwarf_find_save_locs)
@ -419,6 +420,10 @@ extern void dwarf_put_unwind_info (unw_addr_space_t as,
extern int dwarf_eval_expr (struct dwarf_cursor *c, unw_word_t *addr, extern int dwarf_eval_expr (struct dwarf_cursor *c, unw_word_t *addr,
unw_word_t len, unw_word_t *valp, unw_word_t len, unw_word_t *valp,
int *is_register); int *is_register);
extern int
dwarf_stack_aligned(struct dwarf_cursor *c, unw_word_t cfa_addr,
unw_word_t rbp_addr, unw_word_t *offset);
extern int dwarf_extract_proc_info_from_fde (unw_addr_space_t as, extern int dwarf_extract_proc_info_from_fde (unw_addr_space_t as,
unw_accessors_t *a, unw_accessors_t *a,
unw_word_t *fde_addr, unw_word_t *fde_addr,

View file

@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
typedef enum typedef enum
{ {
UNW_X86_64_FRAME_ALIGNED = -3, /* frame stack pointer aligned */
UNW_X86_64_FRAME_STANDARD = -2, /* regular rbp, rsp +/- offset */ UNW_X86_64_FRAME_STANDARD = -2, /* regular rbp, rsp +/- offset */
UNW_X86_64_FRAME_SIGRETURN = -1, /* special sigreturn frame */ UNW_X86_64_FRAME_SIGRETURN = -1, /* special sigreturn frame */
UNW_X86_64_FRAME_OTHER = 0, /* not cacheable (special or unrecognised) */ UNW_X86_64_FRAME_OTHER = 0, /* not cacheable (special or unrecognised) */
@ -50,10 +51,10 @@ unw_tdep_frame_type_t;
typedef struct typedef struct
{ {
uint64_t virtual_address; uint64_t virtual_address;
int64_t frame_type : 2; /* unw_tdep_frame_type_t classification */ int64_t frame_type : 3; /* unw_tdep_frame_type_t classification */
int64_t last_frame : 1; /* non-zero if last frame in chain */ int64_t last_frame : 1; /* non-zero if last frame in chain */
int64_t cfa_reg_rsp : 1; /* cfa dwarf base register is rsp vs. rbp */ int64_t cfa_reg_rsp : 1; /* cfa dwarf base register is rsp vs. rbp */
int64_t cfa_reg_offset : 30; /* cfa is at this offset from base register value */ int64_t cfa_reg_offset : 29; /* cfa is at this offset from base register value */
int64_t rbp_cfa_offset : 15; /* rbp saved at this offset from cfa (-1 = not saved) */ int64_t rbp_cfa_offset : 15; /* rbp saved at this offset from cfa (-1 = not saved) */
int64_t rsp_cfa_offset : 15; /* rsp saved at this offset from cfa (-1 = not saved) */ int64_t rsp_cfa_offset : 15; /* rsp saved at this offset from cfa (-1 = not saved) */
} }

View file

@ -186,6 +186,54 @@ read_operand (unw_addr_space_t as, unw_accessors_t *a,
return ret; return ret;
} }
HIDDEN int
dwarf_stack_aligned(struct dwarf_cursor *c, unw_word_t cfa_addr,
unw_word_t rbp_addr, unw_word_t *cfa_offset) {
unw_accessors_t *a;
int ret;
void *arg;
unw_word_t len;
uint8_t opcode;
unw_word_t operand1;
a = unw_get_accessors (c->as);
arg = c->as_arg;
ret = dwarf_read_uleb128(c->as, a, &rbp_addr, &len, arg);
if (len != 2 || ret < 0)
return 0;
ret = dwarf_readu8(c->as, a, &rbp_addr, &opcode, arg);
if (ret < 0 || opcode != DW_OP_breg6)
return 0;
ret = read_operand(c->as, a, &rbp_addr,
OPND1_TYPE(operands[opcode]), &operand1, arg);
if (ret < 0 || operand1 != 0)
return 0;
ret = dwarf_read_uleb128(c->as, a, &cfa_addr, &len, arg);
if (ret < 0 || len != 3)
return 0;
ret = dwarf_readu8(c->as, a, &cfa_addr, &opcode, arg);
if (ret < 0 || opcode != DW_OP_breg6)
return 0;
ret = read_operand(c->as, a, &cfa_addr,
OPND1_TYPE(operands[opcode]), &operand1, arg);
if (ret < 0)
return 0;
ret = dwarf_readu8(c->as, a, &cfa_addr, &opcode, arg);
if (ret < 0 || opcode != DW_OP_deref)
return 0;
*cfa_offset = operand1;
return 1;
}
HIDDEN int HIDDEN int
dwarf_eval_expr (struct dwarf_cursor *c, unw_word_t *addr, unw_word_t len, dwarf_eval_expr (struct dwarf_cursor *c, unw_word_t *addr, unw_word_t len,
unw_word_t *valp, int *is_register) unw_word_t *valp, int *is_register)

View file

@ -41,6 +41,23 @@ tdep_stash_frame (struct dwarf_cursor *d, struct dwarf_reg_state *rs)
rs->reg[RBP].where, rs->reg[RBP].val, DWARF_GET_LOC(d->loc[RBP]), rs->reg[RBP].where, rs->reg[RBP].val, DWARF_GET_LOC(d->loc[RBP]),
rs->reg[RSP].where, rs->reg[RSP].val, DWARF_GET_LOC(d->loc[RSP])); rs->reg[RSP].where, rs->reg[RSP].val, DWARF_GET_LOC(d->loc[RSP]));
if (rs->reg[DWARF_CFA_REG_COLUMN].where == DWARF_WHERE_EXPR &&
rs->reg[RBP].where == DWARF_WHERE_EXPR) {
/* Check for GCC generated alignment frame for rsp. A simple
* def_cfa_expr that loads a constant offset from rbp, where the
* addres of the rip was pushed on the stack */
unw_word_t cfa_addr = rs->reg[DWARF_CFA_REG_COLUMN].val;
unw_word_t rbp_addr = rs->reg[RBP].val;
unw_word_t cfa_offset;
int ret = dwarf_stack_aligned(d, cfa_addr, rbp_addr, &cfa_offset);
if (ret) {
f->frame_type = UNW_X86_64_FRAME_ALIGNED;
f->cfa_reg_offset = cfa_offset;
f->cfa_reg_rsp = 0;
}
}
/* A standard frame is defined as: /* A standard frame is defined as:
- CFA is register-relative offset off RBP or RSP; - CFA is register-relative offset off RBP or RSP;
- Return address is saved at CFA-8; - Return address is saved at CFA-8;
@ -50,7 +67,7 @@ tdep_stash_frame (struct dwarf_cursor *d, struct dwarf_reg_state *rs)
&& (rs->reg[DWARF_CFA_REG_COLUMN].where == DWARF_WHERE_REG) && (rs->reg[DWARF_CFA_REG_COLUMN].where == DWARF_WHERE_REG)
&& (rs->reg[DWARF_CFA_REG_COLUMN].val == RBP && (rs->reg[DWARF_CFA_REG_COLUMN].val == RBP
|| rs->reg[DWARF_CFA_REG_COLUMN].val == RSP) || rs->reg[DWARF_CFA_REG_COLUMN].val == RSP)
&& labs((long) rs->reg[DWARF_CFA_OFF_COLUMN].val) < (1 << 29) && labs((long) rs->reg[DWARF_CFA_OFF_COLUMN].val) < (1 << 28)
&& DWARF_GET_LOC(d->loc[d->ret_addr_column]) == d->cfa-8 && DWARF_GET_LOC(d->loc[d->ret_addr_column]) == d->cfa-8
&& (rs->reg[RBP].where == DWARF_WHERE_UNDEF && (rs->reg[RBP].where == DWARF_WHERE_UNDEF
|| rs->reg[RBP].where == DWARF_WHERE_SAME || rs->reg[RBP].where == DWARF_WHERE_SAME
@ -92,6 +109,10 @@ tdep_stash_frame (struct dwarf_cursor *d, struct dwarf_reg_state *rs)
Debug (4, " sigreturn frame\n"); Debug (4, " sigreturn frame\n");
} }
else if (f->frame_type == UNW_X86_64_FRAME_ALIGNED) {
Debug (4, " aligned frame, offset %li\n", f->cfa_reg_offset);
}
/* PLT and guessed RBP-walked frames are handled in unw_step(). */ /* PLT and guessed RBP-walked frames are handled in unw_step(). */
else else
Debug (4, " unusual frame\n"); Debug (4, " unusual frame\n");

View file

@ -506,6 +506,24 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
d->use_prev_instr = 0; d->use_prev_instr = 0;
break; break;
case UNW_X86_64_FRAME_ALIGNED:
/* Address of RIP was pushed on the stack via a simple
* def_cfa_expr - result stack offset stored in cfa_reg_offset */
cfa = (f->cfa_reg_rsp ? rsp : rbp) + f->cfa_reg_offset;
ACCESS_MEM_FAST(ret, c->validate, d, cfa, cfa);
if (likely(ret >= 0))
ACCESS_MEM_FAST(ret, c->validate, d, cfa - 8, rip);
if (likely(ret >= 0))
ACCESS_MEM_FAST(ret, c->validate, d, rbp, rbp);
/* Don't bother reading RSP from DWARF, CFA becomes new RSP. */
rsp = cfa;
/* Next frame needs to back up for unwind info lookup. */
d->use_prev_instr = 1;
break;
default: default:
/* We cannot trace through this frame, give up and tell the /* We cannot trace through this frame, give up and tell the
caller we had to stop. Data collected so far may still be caller we had to stop. Data collected so far may still be