Compare commits

...

10 commits

Author SHA1 Message Date
Théophile Bastian 6df057260d Add uncommitted tests and curiosities 2019-04-05 16:39:21 +02:00
Théophile Bastian 11fa3fab40 Check_gen: patch for flat vs pyramid clang tables
clang generates tables like this

      LOC           CFA      rbx   rbp   r14   r15   ra
   00000000000028a0 rsp+8    u     u     u     u     c-8
   00000000000028a1 rsp+16   u     u     u     u     c-8
   00000000000028a3 rsp+24   u     u     u     u     c-8
   00000000000028a5 rsp+32   u     u     u     u     c-8
   00000000000028a6 rsp+40   u     u     u     u     c-8
   00000000000028aa rsp+64   c-40  c-16  c-32  c-24  c-8
2019-04-05 16:32:00 +02:00
Théophile Bastian 344ac84ef3 Ignore ghost subroutines for clang
A ghost subroutine is a subroutine having, in the IR representation, no
content. At clang -O0, some might be generated, eg.

<foo_func>:
foo_addr        nop

which translates to Empty in BIL.
2019-04-05 11:23:18 +02:00
Théophile Bastian 6c18d9f537 Use rbp only on the subs where we need it 2019-04-05 11:23:13 +02:00
Théophile Bastian 29ab916c55 Fix next_instr_graph out-of-subroutine pointers
The disasm-based next_instr_graph would introduce next instructions out
of the current subroutine for eg. calls, jmp to plts, etc.
2019-04-04 19:47:36 +02:00
Théophile Bastian 5f7dfb6f5f Use disasm to generate next_instr_graph 2019-04-04 13:49:39 +02:00
Théophile Bastian 3d336de196 Add flag to never go back to undefined rbp
Once rbp has been set in the DWARF, if this flag is set, nothing will
remove it from the table. This mimicks gcc and allows us to check easily
our tables against theirs.
2019-04-04 11:52:47 +02:00
Théophile Bastian 4313ee91a7 Curiosities: mov %rsp, %rbp not switching to rbp tracking 2019-04-04 11:51:57 +02:00
Théophile Bastian c3050da113 Csmith checker: check rbp column 2019-04-03 18:20:03 +02:00
Théophile Bastian 5d6929f84c csmith_gen: add COMPILE_CMD option 2019-04-03 12:22:07 +02:00
13 changed files with 5754 additions and 125 deletions

View file

@ -1,8 +1,8 @@
open Std
let main outfile proj =
let main ?no_rbp_undef:(no_rbp_undef=false) outfile proj =
let pre_dwarf = proj
|> Simplest.of_proj
|> Simplest.of_proj no_rbp_undef
|> Simplest.clean_lost_track_subs in
Format.printf "%a" Frontend.pp_pre_dwarf_readelf pre_dwarf;
let pre_c_dwarf = PreCBinding.convert_pre_c pre_dwarf in

View file

@ -27,7 +27,8 @@ type subroutine_cfa_data = {
}
type block_local_state = {
rbp_vars: BStd.Var.Set.t
rbp_vars: BStd.Var.Set.t;
rbp_pop_set: BStd.Tid.Set.t
}
module StrMap = Map.Make(String)
@ -38,11 +39,26 @@ module TIdMap = Map.Make(BStd.Tid)
exception InvalidSub
exception UnexpectedRbpSet
type synthesis_settings = {
mutable no_rbp_undef: bool
}
let __settings = {
no_rbp_undef = false
}
let pp_cfa_pos ppx = function
| RspOffset off -> Format.fprintf ppx "RSP + (%s)" (Int64.to_string off)
| RbpOffset off -> Format.fprintf ppx "RBP + (%s)" (Int64.to_string off)
| CfaLostTrack -> Format.fprintf ppx "??@."
let pp_rbp_pos ppx = function
| RbpUndef -> Format.fprintf ppx "u"
| RbpCfaOffset off -> Format.fprintf ppx "c%+Ld" off
let pp_reg_pos ppx (cfa_pos, rbp_pos) =
Format.fprintf ppx "(%a; %a)" pp_cfa_pos cfa_pos pp_rbp_pos rbp_pos
let pp_int64_hex ppx number =
let mask_short = Int64.(pred (shift_left one 16)) in
let pp_short number =
@ -82,6 +98,15 @@ let opt_addr_of_blk_elt = function
| `Jmp jmp -> opt_addr_of jmp
| `Phi phi -> opt_addr_of phi
let is_ghost_sub sub =
(** Check whether the subroutine has content *)
let is_ghost_block blk =
BStd.Blk.elts blk
|> BStd.Seq.is_empty
in
let blk_seq = BStd.Term.enum BStd.blk_t sub in
BStd.Seq.for_all blk_seq ~f:is_ghost_block
let entrypoint_address blk =
(** Find the first instruction address in the current block.
Return None if no instruction has address. *)
@ -104,79 +129,195 @@ let map_option f = function
| None -> None
| Some x -> Some (f x)
let build_next_instr graph =
exception Block_not_in_subroutine
let build_next_instr sub_ranges (disasm: BStd.disasm): AddrSet.t AddrMap.t =
(** Build a map of memory_address -> AddrSet.t holding, for each address, the
set of instructions coming right after the instruction at given address.
There might be multiple such addresses, if the current instruction is at
a point of branching. *)
let addresses_in_block blk =
(** Set of addresses present in the block *)
BStd.Seq.fold (BStd.Blk.elts blk)
~init:AddrSet.empty
~f:(fun accu elt ->
let addr = opt_addr_of_blk_elt elt in
match addr with
| None -> accu
| Some x ->
(try
AddrSet.add (BStd.Word.to_int64_exn x) accu
with _ -> accu)
)
let rec build_of_instr_list cur_map = function
(** Maps an instruction to its following instruction in this block *)
| (cur_mem, cur_insn) :: ((next_mem, next_insn) as elt2) :: tl ->
(* Its only successor is next_insn *)
let new_map =
(try
let cur_addr = to_int64_addr @@ BStd.Memory.min_addr cur_mem
and next_addr = to_int64_addr @@ BStd.Memory.min_addr next_mem in
AddrMap.add cur_addr (AddrSet.singleton next_addr) cur_map
with _ -> cur_map)
in
build_of_instr_list new_map (elt2 :: tl)
| (cur_mem, cur_insn) :: [] ->
let last_addr =
(try Some (to_int64_addr @@ BStd.Memory.min_addr cur_mem)
with _ -> None) in
cur_map, last_addr
(* Ignore the last one: its successors are held in the graph *)
| [] ->
cur_map, None
in
let node_successors_addr (nd: CFG.node) : AddrSet.t =
let rec do_find_succ accu nd =
let fold_one accu c_node =
match entrypoint_address (CFG.Node.label c_node) with
| Some addr ->
(try
AddrSet.add (BStd.Word.to_int64_exn addr) accu
with _ -> accu)
| None -> do_find_succ accu c_node
in
let cfg = BStd.Disasm.cfg disasm in
let succ = CFG.Node.succs nd graph in
BStd.Seq.fold succ
~init:accu
~f:fold_one
in
do_find_succ AddrSet.empty nd
let rec block_addresses block =
(try BStd.Block.addr block
|> to_int64_addr
|> AddrSet.singleton
with _ ->
(* Probably an intermediary node, eg. JMP --> [inermed node] --> BLK *)
let outputs = BStd.Graphs.Cfg.Node.outputs block cfg
|> BStd.Seq.map ~f:BStd.Graphs.Cfg.Edge.dst in
BStd.Seq.fold outputs
~init:AddrSet.empty
~f:(fun accu block -> AddrSet.union (block_addresses block) accu)
)
in
let build_of_block accu_map node =
let blk = CFG.Node.label node in
let node_successors = node_successors_addr node in
let instr_addresses = AddrSet.elements @@ addresses_in_block blk in
let build_of_block cur_map block =
(try
(* First, check that this block belongs to a subroutine *)
let block_first_address = (
try
to_int64_addr @@ BStd.Block.addr block
with _ -> raise Block_not_in_subroutine) in
let sub_first_addr, sub_last_addr = (
try AddrMap.find_last
(fun start_addr -> start_addr <= block_first_address) sub_ranges
with Not_found ->
raise Block_not_in_subroutine
) in
let rec accumulate_mappings mappings addr_list = function
| None -> mappings
| Some (instr, instr_seq) as cur_instr ->
let instr_addr = opt_addr_of_blk_elt instr in
match (map_option to_int64_addr instr_addr), addr_list with
| None, _ ->
accumulate_mappings mappings addr_list @@ BStd.Seq.next instr_seq
| Some cur_addr, next_addr::t when cur_addr >= next_addr ->
accumulate_mappings mappings t cur_instr
| Some cur_addr, next_addr::_ ->
let n_mappings = AddrMap.add
cur_addr (AddrSet.singleton next_addr) mappings in
accumulate_mappings n_mappings addr_list @@ BStd.Seq.next instr_seq
| Some cur_addr, [] ->
let n_mappings = AddrMap.add
cur_addr node_successors mappings in
accumulate_mappings n_mappings addr_list @@ BStd.Seq.next instr_seq
in
accumulate_mappings
accu_map
instr_addresses
(BStd.Seq.next @@ BStd.Blk.elts blk)
(* Add the sequence of instuctions inside the block itself *)
let cur_map, last_addr =
build_of_instr_list cur_map (BStd.Block.insns block) in
(* Then the set of possible destinations for the block terminator *)
(match last_addr with
| Some last_addr ->
let following_set = BStd.Graphs.Cfg.Node.outputs block cfg
|> BStd.Seq.fold
~init:AddrSet.empty
~f:(fun set edge -> AddrSet.union
(block_addresses
(BStd.Graphs.Cfg.Edge.dst edge))
set)
|> AddrSet.filter (fun addr ->
sub_first_addr <= addr
&& addr <= sub_last_addr)
(* ^ We must ensure the landing address belongs
to the current subroutine for our purpose *)
in
AddrMap.add last_addr following_set cur_map
| None -> cur_map
)
with Block_not_in_subroutine ->
cur_map
)
in
BStd.Seq.fold (CFG.nodes graph)
BStd.Seq.fold (BStd.Graphs.Cfg.nodes cfg)
~init:AddrMap.empty
~f:build_of_block
let find_rbp_pop_set cfg entry =
(** Returns a BStd.Tid.Set.t of the terms actually "popping" %rbp, that is,
the terms that should trigger a change to RbpUndef of the %rbp register.
The current heuristic is to consider the expressions
i) of the form `rbp = F(mem, rsp)` (alledgedly, rbp = something from the
stack);
ii) that are the last of this kind in the subroutine's CFG (ie. such that
there is not another instruction matching (i) that is reachable through
the CFG from the current instruction).
*)
let def_is_rbp_pop def =
let is_pop_expr expr =
let free_vars = BStd.Exp.free_vars expr in
let free_x86_regs = Regs.X86_64.map_varset free_vars in
(match Regs.DwRegOptSet.cardinal free_x86_regs with
| 2 ->
let reg = free_x86_regs
|> Regs.DwRegOptSet.filter
(fun x -> match x with None -> false | Some _ -> true)
|> Regs.DwRegOptSet.choose in
let has_mem_var = BStd.Var.Set.exists
~f:(fun x -> BStd.Var.name x = "mem")
free_vars in
(match reg, has_mem_var with
| Some dw_var, true when dw_var = Regs.X86_64.rsp -> true
| _ -> false)
| _ -> false
)
in
(match Regs.X86_64.of_var (BStd.Def.lhs def),
is_pop_expr @@ BStd.Def.rhs def with
| Some reg, true when reg = Regs.X86_64.rbp -> true
| _ -> false
)
in
let block_find_rbp_pop block =
let fold_elt = function
| `Def(def) when (def_is_rbp_pop def) -> Some (BStd.Term.tid def)
| _ -> None
in
let elts_seq = BStd.Blk.elts block in
let last_pop = BStd.Seq.fold elts_seq
~init:None
~f:(fun accu elt ->
(match fold_elt elt with
| None -> accu
| Some tid -> Some tid))
in
last_pop
in
let rec block_dfs node visited =
(* DFS on the CFG to find rbp pops, and rule out those that are not final
*)
let block = CFG.Node.label node in
(match BStd.Blk.Set.mem visited block with
| true ->
(* Loop: we already visited this node *)
BStd.Tid.Set.empty, true, visited
| false ->
let visited = BStd.Blk.Set.add visited block in
let pop_set, has_pop, visited =
BStd.Seq.fold (CFG.Node.succs node cfg)
~f:(fun (pre_pop_set, pre_has_pop, visited) child ->
let cur_pop_set, cur_has_pop, visited =
block_dfs child visited in
(BStd.Tid.Set.union pre_pop_set cur_pop_set),
(pre_has_pop || cur_has_pop),
visited
)
~init:(BStd.Tid.Set.empty, false, visited)
in
let pop_set, has_pop = (match has_pop with
| false -> (* No rbp pop below, we seek rbp pops in this block *)
(match block_find_rbp_pop block with
| None -> pop_set, false
| Some tid -> BStd.Tid.Set.add pop_set tid, true
)
| true -> pop_set, has_pop) in
pop_set, has_pop, visited
)
in
if __settings.no_rbp_undef then
BStd.Tid.Set.empty
else (
let pop_set, _, _ =
block_dfs entry (BStd.Blk.Set.empty) in
pop_set
)
let interpret_var_expr c_var offset expr = BStd.Bil.(
let closed_form = BStd.Exp.substitute
(var c_var)
@ -212,13 +353,14 @@ let is_single_free_reg expr =
)
let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
allow_rbp
: (reg_pos option * block_local_state) =
let lose_track = Some CfaLostTrack in
let cur_cfa, cur_rbp = cur_reg in
let out_cfa =
(match cur_cfa, Regs.X86_64.of_var (BStd.Def.lhs def) with
| RspOffset(cur_offset), Some reg when reg = Regs.X86_64.rsp ->
(match cur_cfa, Regs.X86_64.of_var (BStd.Def.lhs def), allow_rbp with
| RspOffset(cur_offset), Some reg, _ when reg = Regs.X86_64.rsp ->
let exp = BStd.Def.rhs def in
(match is_single_free_reg exp with
| Some (bil_var, dw_var) when dw_var = Regs.X86_64.rsp ->
@ -230,7 +372,7 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
)
| _ -> lose_track
)
| RspOffset(cur_offset), Some reg when reg = Regs.X86_64.rbp ->
| RspOffset(cur_offset), Some reg, true when reg = Regs.X86_64.rbp ->
(* We have CFA=rsp+k and a line %rbp <- [expr].
Might be a %rbp <- %rsp *)
let exp = BStd.Def.rhs def in
@ -251,7 +393,7 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
)
| _ -> None
)
| RbpOffset(cur_offset), Some reg when reg = Regs.X86_64.rbp ->
| RbpOffset(cur_offset), Some reg, true when reg = Regs.X86_64.rbp ->
(* Assume we are overwriting %rbp with something — we must revert to
some rsp-based indexing *)
(* FIXME don't assume the rsp offset will always be 8, find a smart way
@ -260,6 +402,7 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
value is read from the stack.
*)
Some (RspOffset(Int64.of_int 16))
| RbpOffset _, _, false -> assert false
| _ -> None
) in
@ -288,25 +431,6 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
(has_rbp_var || has_intermed_rbp_var))
in
let is_pop_expr expr =
let free_vars = BStd.Exp.free_vars expr in
let free_x86_regs = Regs.X86_64.map_varset free_vars in
(match Regs.DwRegOptSet.cardinal free_x86_regs with
| 2 ->
let reg = free_x86_regs
|> Regs.DwRegOptSet.filter
(fun x -> match x with None -> false | Some _ -> true)
|> Regs.DwRegOptSet.choose in
let has_mem_var = BStd.Var.Set.exists
~f:(fun x -> BStd.Var.name x = "mem")
free_vars in
(match reg, has_mem_var with
| Some dw_var, true when dw_var = Regs.X86_64.rsp -> true
| _ -> false)
| _ -> false
)
in
let is_rbp_expr expr =
let free_vars = BStd.Exp.free_vars expr in
let free_x86_regs = Regs.X86_64.map_varset free_vars in
@ -369,13 +493,14 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
new_rbp, cur_state
| RbpCfaOffset offs ->
(* We go back to RbpUndef when encountering something like a `pop rbp`,
that is, RBP <- f(RSP, mem) *)
(match Regs.X86_64.of_var (BStd.Def.lhs def),
is_pop_expr @@ BStd.Def.rhs def with
| Some reg, true when reg = Regs.X86_64.rbp ->
(* We go back to RbpUndef if the current def is in the rbp_pop_set --
see `find_rbp_pop_set` *)
(match BStd.Tid.Set.mem (local_state.rbp_pop_set) @@ BStd.Term.tid def
with
| true ->
Some RbpUndef, local_state
| _ -> None, local_state
| false -> None, local_state
)
)
in
@ -404,7 +529,8 @@ let process_jmp jmp (cur_reg: reg_pos)
| _ -> None
let process_blk
next_instr_graph (block_init: reg_pos) blk : (reg_changes_fde * reg_pos) =
next_instr_graph rbp_pop_set allow_rbp (block_init: reg_pos) blk
: (reg_changes_fde * reg_pos) =
(** Extracts the registers (CFA+RBP) changes of a block. *)
let apply_offset cur_addr_opt ((accu:reg_changes_fde), cur_reg, local_state)
@ -424,7 +550,8 @@ let process_blk
let fold_elt (accu, cur_reg, cur_local_state) elt = match elt with
| `Def(def) ->
let new_offset, new_state = process_def cur_local_state def cur_reg in
let new_offset, new_state =
process_def cur_local_state def cur_reg allow_rbp in
apply_offset
(opt_addr_of def) (accu, cur_reg, new_state) new_offset
| `Jmp(jmp) ->
@ -442,7 +569,8 @@ let process_blk
) in
let empty_local_state = {
rbp_vars = BStd.Var.Set.empty
rbp_vars = BStd.Var.Set.empty;
rbp_pop_set = rbp_pop_set
} in
let elts_seq = BStd.Blk.elts blk in
let out_reg, end_reg, _ = BStd.Seq.fold elts_seq
@ -525,18 +653,21 @@ let cleanup_fde (fde_changes: reg_changes_fde) : reg_changes_fde =
match AddrMap.fold fold_one fde_changes (AddrMap.empty, None, false) with
| out, _, _ -> out
let process_sub sub : subroutine_cfa_data =
let process_sub sub next_instr_graph : subroutine_cfa_data =
(** Extracts the `cfa_changes_fde` of a subroutine *)
let cfg = BStd.Sub.to_cfg sub in
let next_instr_graph = build_next_instr cfg in
let first_addr = int64_addr_of sub in
let last_addr = find_last_addr sub in
let initial_cfa_rsp_offset = Int64.of_int 8 in
let entry_blk = get_entry_blk cfg in
let rbp_pop_set = find_rbp_pop_set cfg entry_blk in
let rec dfs_process
allow_rbp
(sub_changes: (reg_changes_fde * reg_pos) TIdMap.t)
node
(entry_offset: reg_pos) =
@ -549,23 +680,56 @@ let process_sub sub : subroutine_cfa_data =
| None ->
(* Not yet visited: compute the changes *)
let cur_blk_changes, end_reg =
process_blk next_instr_graph entry_offset cur_blk in
process_blk next_instr_graph rbp_pop_set
allow_rbp entry_offset cur_blk in
let n_sub_changes =
TIdMap.add tid (cur_blk_changes, entry_offset) sub_changes in
BStd.Seq.fold (CFG.Node.succs node cfg)
~f:(fun accu child -> dfs_process accu child end_reg)
~f:(fun accu child -> dfs_process allow_rbp accu child end_reg)
~init:n_sub_changes
| Some (_, former_entry_offset) ->
(* Already visited: check that entry values are matching *)
if entry_offset <> former_entry_offset then
if entry_offset <> former_entry_offset then (
if allow_rbp then
Format.eprintf "Found inconsistency (0x%Lx): %a -- %a@."
(int64_addr_of cur_blk)
pp_reg_pos entry_offset pp_reg_pos former_entry_offset ;
raise (Inconsistent tid)
)
else
sub_changes
in
let entry_blk = get_entry_blk cfg in
let with_rbp_if_needed initial_offset =
(* Tries first without allowing CFA=rbp+k, then allowing it if the first
result was either inconsistent or lost track *)
let not_losing_track synth_result =
let lost_track = TIdMap.exists
(fun _ (_, (cfa_pos, _)) -> match cfa_pos with
| CfaLostTrack -> true
| _ -> false) synth_result
in
(match lost_track with
| true -> None
| false -> Some synth_result)
in
let without_rbp =
(try
dfs_process false TIdMap.empty entry_blk initial_offset
|> not_losing_track
with Inconsistent _ -> None
)
in
(match without_rbp with
| Some correct_res -> correct_res
| None ->
dfs_process true TIdMap.empty entry_blk initial_offset)
in
let initial_offset = (RspOffset initial_cfa_rsp_offset, RbpUndef) in
let changes_map = dfs_process TIdMap.empty entry_blk initial_offset in
(* Try first without rbp, then with rbp upon failure *)
let changes_map = with_rbp_if_needed initial_offset in
let merged_changes = TIdMap.fold
(fun _ (cfa_changes, _) accu -> AddrMap.union (fun _ v1 v2 ->
@ -587,12 +751,16 @@ let process_sub sub : subroutine_cfa_data =
output
let of_prog prog : subroutine_cfa_map =
let of_prog prog next_instr_graph : subroutine_cfa_map =
(** Extracts the `cfa_changes` of a program *)
let fold_step accu sub =
(try
let subroutine_data = process_sub sub in
StrMap.add (BStd.Sub.name sub) subroutine_data accu
(match is_ghost_sub sub with
| true -> accu
| false ->
let subroutine_data = process_sub sub next_instr_graph in
StrMap.add (BStd.Sub.name sub) subroutine_data accu
)
with
| InvalidSub -> accu
| Inconsistent tid ->
@ -606,10 +774,34 @@ let of_prog prog : subroutine_cfa_map =
~init:StrMap.empty
~f:fold_step
let of_proj proj : subroutine_cfa_map =
let build_sub_ranges prog: (memory_address) AddrMap.t =
(** Builds a map mapping the first address of each subroutine to its last
address. This map can be interpreted as a list of address ranges with
easy fast access to a member (cf Map.S.find_first) *)
let fold_subroutine accu sub =
(match is_ghost_sub sub with
| true -> accu
| false ->
let first_addr = int64_addr_of sub in
let last_addr = find_last_addr sub in
AddrMap.add first_addr (last_addr) accu
)
in
let subroutines = BStd.Term.enum BStd.sub_t prog in
BStd.Seq.fold subroutines
~init:AddrMap.empty
~f:fold_subroutine
let of_proj no_rbp_undef proj : subroutine_cfa_map =
(** Extracts the `cfa_changes` of a project *)
__settings.no_rbp_undef <- no_rbp_undef ;
let prog = BStd.Project.program proj in
of_prog prog
let sub_ranges = build_sub_ranges prog in
let next_instr_graph =
build_next_instr sub_ranges (BStd.Project.disasm proj) in
of_prog prog next_instr_graph
let clean_lost_track_subs pre_dwarf : subroutine_cfa_map =
(** Removes the subroutines on which we lost track from [pre_dwarf] *)

1
csmith/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
0[0-9]/

View file

@ -32,10 +32,105 @@ def parse_fde_head(line):
return pc_beg, pc_end
def parse_fde_row(line, ra_col):
def detect_clang_flat_to_pyramid(rows):
""" Artificially repair clang flat callee-saved saving to a gcc pyramid-like shape.
Eg., clang will generate
LOC CFA rbx rbp ra
0000000000007180 rsp+8 u u c-8
0000000000007181 rsp+16 u u c-8
0000000000007182 rsp+24 u u c-8
0000000000007189 rsp+7632 c-24 c-16 c-8
while we would wish to have
LOC CFA rbx rbp ra
0000000000007180 rsp+8 u u c-8
0000000000007181 rsp+16 u c-16 c-8
0000000000007182 rsp+24 c-24 c-16 c-8
0000000000007189 rsp+7632 c-24 c-16 c-8
The repair strategy is as follows:
- ignore the implicit first row
- find the first k lines such that only CFA changes from line to line, with a delta
of +8, with address delta of 2. (push is 2 bytes long)
- for every callee-saved R that concerns us and that is defined at line k+1 with
offset c-x, while rsp+x is the CFA of line k' <= k, redefine R as c-k in lines
[k'; k[
"""
def try_starting_at(start_row):
if len(rows) < start_row + 1: # Ensure we have at least the start row
return rows, False
flatness_row_id = start_row
if rows[1]["CFA"][:4] != "rsp+":
return rows, False
first_cfa = int(rows[start_row]["CFA"][4:])
prev_cfa = first_cfa
prev_loc = rows[start_row]["LOC"]
for row in rows[start_row + 1 :]:
for reg in row:
if reg not in ["LOC", "CFA", "ra"] and row[reg] != "u":
break
cfa = row["CFA"]
if cfa[:4] != "rsp+":
break
cfa_offset = int(cfa[4:])
if cfa_offset != prev_cfa + 8:
break
prev_cfa += 8
loc = row["LOC"]
if loc > prev_loc + 2:
break
prev_loc = loc
flatness_row_id += 1
flatness_row_id += 1
if flatness_row_id - start_row <= 1 or flatness_row_id >= len(rows):
return rows, False # nothing to change
flatness_row = rows[flatness_row_id]
reg_changes = {}
for reg in flatness_row:
if reg in ["LOC", "CFA", "ra"]:
continue
rule = flatness_row[reg]
if rule[:2] != "c-":
return rows, False # Not a flat_to_pyramid after all
rule_offset = int(rule[2:])
rule_offset_rectified = rule_offset - first_cfa
if rule_offset_rectified % 8 != 0:
return rows, False
row_change_id = rule_offset_rectified // 8 + start_row
reg_changes[reg] = (row_change_id, rule)
for reg in reg_changes:
change_from, rule = reg_changes[reg]
for row in rows[change_from:flatness_row_id]:
row[reg] = rule
return rows, True
for start_row in [1, 2]:
mod_rows, modified = try_starting_at(start_row)
if modified:
return mod_rows
return rows
def parse_fde_row(line, reg_cols):
vals = list(map(lambda x: x.strip(), line.split()))
assert len(vals) > ra_col # ra is the rightmost useful column
out = {"LOC": int(vals[0], 16), "CFA": vals[1], "ra": vals[ra_col]}
assert len(vals) > reg_cols["ra"] # ra is the rightmost useful column
out = {"LOC": int(vals[0], 16), "CFA": vals[1]}
for reg in reg_cols:
col_id = reg_cols[reg]
out[reg] = vals[col_id]
if "rbp" not in out:
out["rbp"] = "u"
return out
@ -47,7 +142,14 @@ def clean_rows(rows):
out_rows = [rows[0]]
for row in rows[1:]:
if not row == out_rows[-1]:
out_rows.append(row)
filtered_row = row
filter_out = []
for reg in filtered_row:
if reg not in ["LOC", "CFA", "rbp", "ra"]:
filter_out.append(reg)
for reg in filter_out:
filtered_row.pop(reg)
out_rows.append(filtered_row)
return out_rows
@ -58,14 +160,28 @@ def parse_fde(lines):
except NotFDE:
return
rows = [{"LOC": 0, "CFA": "rsp+8", "ra": "c-8"}] # Implicit CIE row
rows = [{"LOC": 0, "CFA": "rsp+8", "rbp": "u", "ra": "c-8"}] # Implicit CIE row
if len(lines) >= 2: # Has content
head_row = list(map(lambda x: x.strip(), lines[1].split()))
ra_col = head_row.index("ra")
reg_cols = {}
for pos, reg in enumerate(head_row):
if reg not in ["LOC", "CFA"]:
reg_cols[reg] = pos
for line in lines[2:]:
rows.append(parse_fde_row(line, ra_col))
rows.append(parse_fde_row(line, reg_cols))
# if pc_beg == 0x1160:
# print("===== FDE: {}..{} ====".format(hex(pc_beg), hex(pc_end)))
# print("BEFORE:")
# for row in rows:
# print(row)
rows = detect_clang_flat_to_pyramid(rows)
# if pc_beg == 0x1160:
# print("AFTER:")
# for row in rows:
# print(row)
return {"beg": pc_beg, "end": pc_end, "rows": clean_rows(rows)}
@ -151,7 +267,7 @@ def dump_light_fdes(fdes):
def match_fde(orig, synth):
def vals_of(row):
return {"CFA": row["CFA"], "ra": row["ra"]}
return {"CFA": row["CFA"], "ra": row["ra"], "rbp": row["rbp"]}
def loc_of(rch):
return rch[1]["LOC"]
@ -165,7 +281,7 @@ def match_fde(orig, synth):
rowchanges.append((typ, row))
rowchanges.sort(key=loc_of)
matching = True
mismatch_count = 0
for rowid, rowch in enumerate(rowchanges):
typ, row = rowch[0], rowch[1]
cur_val[typ] = vals_of(row)
@ -180,12 +296,17 @@ def match_fde(orig, synth):
hex(row["LOC"]), cur_val[0], cur_val[1]
)
)
matching = False
mismatch_count += 1
return matching
return mismatch_count
def parse_sym_table(handle):
def readint(x):
if x.startswith("0x"):
return int(x[2:], 16)
return int(x)
out_map = {}
for line in handle:
line = line.strip()
@ -194,7 +315,7 @@ def parse_sym_table(handle):
spl = list(map(lambda x: x.strip(), line.split()))
loc = int(spl[1], 16)
size = int(spl[2])
size = readint(spl[2])
name = spl[7]
out_map[name] = (loc, size)
return out_map
@ -227,15 +348,30 @@ def main():
mismatches = 0
for (orig, synth) in matched:
if not match_fde(orig, synth):
mismatches += 1
mismatches += match_fde(orig, synth)
reports = []
if mismatches > 0:
reports.append("{} mismatches".format(mismatches))
if unmatched_orig:
reports.append("{} unmatched (orig)".format(len(unmatched_orig)))
worth_reporting = False
for unmatched in unmatched_orig:
if len(unmatched["rows"]) > 1:
worth_reporting = True
break
if worth_reporting:
unmatched_addrs = [fde_pos(fde) for fde in unmatched_orig]
reports.append(
"{} unmatched (orig): {}".format(
len(unmatched_orig), ", ".join(unmatched_addrs)
)
)
if unmatched_synth:
reports.append("{} unmatched (synth)".format(len(unmatched_synth)))
unmatched_addrs = [fde_pos(fde) for fde in unmatched_synth]
reports.append(
"{} unmatched (synth): {}".format(
len(unmatched_synth), ", ".join(unmatched_addrs)
)
)
if reports:
print("{}: {}".format(test_name, "; ".join(reports)))

View file

@ -1,6 +1,12 @@
#!/bin/bash
USAGE="$0 output_dir nb_tests"
USAGE="$0 output_dir nb_tests
You may also set COMPILE_CMD to eg. 'gcc -O2' if you want to override the
default command."
if [ -z "$COMPILE_CMD" ] ; then
COMPILE_CMD='gcc -O2'
fi
if [ "$#" -lt 2 ] ; then
>&2 echo -e "Missing argument(s). Usage:\n$USAGE"
@ -28,11 +34,11 @@ for _num in $(seq 1 $NB_TESTS); do
csmith > "$path.c"
sed -i 's/^static \(.* func_\)/\1/g' "$path.c"
echo -ne "\r>>> $num.bin "
gcc -O2 -I/usr/include/csmith-2.3.0/ -w "$path.c" -o "$path.orig.bin"
$COMPILE_CMD -I/usr/include/csmith-2.3.0/ -w "$path.c" -o "$path.orig.bin"
objcopy --remove-section '.eh_frame' --remove-section '.eh_frame_hdr' \
"$path.orig.bin" "$path.bin"
echo -ne "\r>>> $num.eh.bin "
../synthesize_dwarf.sh "$path.bin" "$path.eh.bin"
BAP_ARGS='--dwarfsynth-no-rbp-undef' ../synthesize_dwarf.sh "$path.bin" "$path.eh.bin"
if [ "$check_gen_eh_frame" -gt 0 ] ; then
./check_generated_eh_frame.sh "$path"

1
curiosities/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
*.bin

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -20,8 +20,18 @@ module Cmdline = struct
~default:"tmp.marshal"
)
let no_rbp_undef = Cnf.(
param (bool) "no-rbp-undef"
~doc:("Do not unset %rbp after it has been set once in a FDE. "
^"This mimics gcc eh_frame for ease of validation.")
~as_flag:true
~default:false
)
let () = Cnf.(
when_ready ((fun {get=(!!)} ->
Bap.Std.Project.register_pass' (main !!outfile)))
Bap.Std.Project.register_pass' (main
~no_rbp_undef:!!no_rbp_undef
!!outfile )))
)
end

View file

@ -35,7 +35,9 @@ function find_ml_dwarf_write {
}
function bap_synth {
bap "$INPUT_FILE" --no-byteweight -p dwarfsynth --dwarfsynth-output "$TMP_DIR/marshal" \
bap "$INPUT_FILE" \
--no-byteweight -p dwarfsynth \
--dwarfsynth-output "$TMP_DIR/marshal" $BAP_ARGS \
> /dev/null
return $?
}

1
test/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
*.bin

19
test/inline_asm.c Normal file
View file

@ -0,0 +1,19 @@
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char** argv) {
if(argc < 2) {
fprintf(stderr, "Missing argument: loop upper bound.\n");
exit(1);
}
int upper_bound = atoi(argv[1]);
int count = 0;
for(int i=0; i < upper_bound; ++i) {
__asm__("sub $8, %rsp; movq $42, (%rsp)");
count++;
__asm__("add $8, %rsp");
}
printf("%d\n", count);
return 0;
}

28
test/without_rbp_col.c Normal file
View file

@ -0,0 +1,28 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
void rbp_bump_2(int z) {
for(int cz=1; cz < z; cz++) {
int x[cz];
x[cz / 2] = 8;
}
}
void rbp_bump_1(int y) {
for(int cy=1; cy < y; cy++) {
int x[cy];
x[cy / 2] = 8;
rbp_bump_2(x[cy/2]);
}
}
int main(int argc, char** argv) {
if(argc < 2) {
fprintf(stderr, "Missing argument: n\n");
return 1;
}
int num = atoi(argv[1]);
rbp_bump_1(num);
return 0;
}