Compare commits

...

10 commits

Author SHA1 Message Date
Théophile Bastian 6df057260d Add uncommitted tests and curiosities 2019-04-05 16:39:21 +02:00
Théophile Bastian 11fa3fab40 Check_gen: patch for flat vs pyramid clang tables
clang generates tables like this

      LOC           CFA      rbx   rbp   r14   r15   ra
   00000000000028a0 rsp+8    u     u     u     u     c-8
   00000000000028a1 rsp+16   u     u     u     u     c-8
   00000000000028a3 rsp+24   u     u     u     u     c-8
   00000000000028a5 rsp+32   u     u     u     u     c-8
   00000000000028a6 rsp+40   u     u     u     u     c-8
   00000000000028aa rsp+64   c-40  c-16  c-32  c-24  c-8
2019-04-05 16:32:00 +02:00
Théophile Bastian 344ac84ef3 Ignore ghost subroutines for clang
A ghost subroutine is a subroutine having, in the IR representation, no
content. At clang -O0, some might be generated, eg.

<foo_func>:
foo_addr        nop

which translates to Empty in BIL.
2019-04-05 11:23:18 +02:00
Théophile Bastian 6c18d9f537 Use rbp only on the subs where we need it 2019-04-05 11:23:13 +02:00
Théophile Bastian 29ab916c55 Fix next_instr_graph out-of-subroutine pointers
The disasm-based next_instr_graph would introduce next instructions out
of the current subroutine for eg. calls, jmp to plts, etc.
2019-04-04 19:47:36 +02:00
Théophile Bastian 5f7dfb6f5f Use disasm to generate next_instr_graph 2019-04-04 13:49:39 +02:00
Théophile Bastian 3d336de196 Add flag to never go back to undefined rbp
Once rbp has been set in the DWARF, if this flag is set, nothing will
remove it from the table. This mimicks gcc and allows us to check easily
our tables against theirs.
2019-04-04 11:52:47 +02:00
Théophile Bastian 4313ee91a7 Curiosities: mov %rsp, %rbp not switching to rbp tracking 2019-04-04 11:51:57 +02:00
Théophile Bastian c3050da113 Csmith checker: check rbp column 2019-04-03 18:20:03 +02:00
Théophile Bastian 5d6929f84c csmith_gen: add COMPILE_CMD option 2019-04-03 12:22:07 +02:00
13 changed files with 5754 additions and 125 deletions

View file

@ -1,8 +1,8 @@
open Std open Std
let main outfile proj = let main ?no_rbp_undef:(no_rbp_undef=false) outfile proj =
let pre_dwarf = proj let pre_dwarf = proj
|> Simplest.of_proj |> Simplest.of_proj no_rbp_undef
|> Simplest.clean_lost_track_subs in |> Simplest.clean_lost_track_subs in
Format.printf "%a" Frontend.pp_pre_dwarf_readelf pre_dwarf; Format.printf "%a" Frontend.pp_pre_dwarf_readelf pre_dwarf;
let pre_c_dwarf = PreCBinding.convert_pre_c pre_dwarf in let pre_c_dwarf = PreCBinding.convert_pre_c pre_dwarf in

View file

@ -27,7 +27,8 @@ type subroutine_cfa_data = {
} }
type block_local_state = { type block_local_state = {
rbp_vars: BStd.Var.Set.t rbp_vars: BStd.Var.Set.t;
rbp_pop_set: BStd.Tid.Set.t
} }
module StrMap = Map.Make(String) module StrMap = Map.Make(String)
@ -38,11 +39,26 @@ module TIdMap = Map.Make(BStd.Tid)
exception InvalidSub exception InvalidSub
exception UnexpectedRbpSet exception UnexpectedRbpSet
type synthesis_settings = {
mutable no_rbp_undef: bool
}
let __settings = {
no_rbp_undef = false
}
let pp_cfa_pos ppx = function let pp_cfa_pos ppx = function
| RspOffset off -> Format.fprintf ppx "RSP + (%s)" (Int64.to_string off) | RspOffset off -> Format.fprintf ppx "RSP + (%s)" (Int64.to_string off)
| RbpOffset off -> Format.fprintf ppx "RBP + (%s)" (Int64.to_string off) | RbpOffset off -> Format.fprintf ppx "RBP + (%s)" (Int64.to_string off)
| CfaLostTrack -> Format.fprintf ppx "??@." | CfaLostTrack -> Format.fprintf ppx "??@."
let pp_rbp_pos ppx = function
| RbpUndef -> Format.fprintf ppx "u"
| RbpCfaOffset off -> Format.fprintf ppx "c%+Ld" off
let pp_reg_pos ppx (cfa_pos, rbp_pos) =
Format.fprintf ppx "(%a; %a)" pp_cfa_pos cfa_pos pp_rbp_pos rbp_pos
let pp_int64_hex ppx number = let pp_int64_hex ppx number =
let mask_short = Int64.(pred (shift_left one 16)) in let mask_short = Int64.(pred (shift_left one 16)) in
let pp_short number = let pp_short number =
@ -82,6 +98,15 @@ let opt_addr_of_blk_elt = function
| `Jmp jmp -> opt_addr_of jmp | `Jmp jmp -> opt_addr_of jmp
| `Phi phi -> opt_addr_of phi | `Phi phi -> opt_addr_of phi
let is_ghost_sub sub =
(** Check whether the subroutine has content *)
let is_ghost_block blk =
BStd.Blk.elts blk
|> BStd.Seq.is_empty
in
let blk_seq = BStd.Term.enum BStd.blk_t sub in
BStd.Seq.for_all blk_seq ~f:is_ghost_block
let entrypoint_address blk = let entrypoint_address blk =
(** Find the first instruction address in the current block. (** Find the first instruction address in the current block.
Return None if no instruction has address. *) Return None if no instruction has address. *)
@ -104,79 +129,195 @@ let map_option f = function
| None -> None | None -> None
| Some x -> Some (f x) | Some x -> Some (f x)
let build_next_instr graph = exception Block_not_in_subroutine
let build_next_instr sub_ranges (disasm: BStd.disasm): AddrSet.t AddrMap.t =
(** Build a map of memory_address -> AddrSet.t holding, for each address, the (** Build a map of memory_address -> AddrSet.t holding, for each address, the
set of instructions coming right after the instruction at given address. set of instructions coming right after the instruction at given address.
There might be multiple such addresses, if the current instruction is at There might be multiple such addresses, if the current instruction is at
a point of branching. *) a point of branching. *)
let addresses_in_block blk = let rec build_of_instr_list cur_map = function
(** Set of addresses present in the block *) (** Maps an instruction to its following instruction in this block *)
BStd.Seq.fold (BStd.Blk.elts blk) | (cur_mem, cur_insn) :: ((next_mem, next_insn) as elt2) :: tl ->
~init:AddrSet.empty (* Its only successor is next_insn *)
~f:(fun accu elt -> let new_map =
let addr = opt_addr_of_blk_elt elt in (try
match addr with let cur_addr = to_int64_addr @@ BStd.Memory.min_addr cur_mem
| None -> accu and next_addr = to_int64_addr @@ BStd.Memory.min_addr next_mem in
| Some x -> AddrMap.add cur_addr (AddrSet.singleton next_addr) cur_map
(try with _ -> cur_map)
AddrSet.add (BStd.Word.to_int64_exn x) accu in
with _ -> accu) build_of_instr_list new_map (elt2 :: tl)
) | (cur_mem, cur_insn) :: [] ->
let last_addr =
(try Some (to_int64_addr @@ BStd.Memory.min_addr cur_mem)
with _ -> None) in
cur_map, last_addr
(* Ignore the last one: its successors are held in the graph *)
| [] ->
cur_map, None
in in
let node_successors_addr (nd: CFG.node) : AddrSet.t = let cfg = BStd.Disasm.cfg disasm in
let rec do_find_succ accu nd =
let fold_one accu c_node =
match entrypoint_address (CFG.Node.label c_node) with
| Some addr ->
(try
AddrSet.add (BStd.Word.to_int64_exn addr) accu
with _ -> accu)
| None -> do_find_succ accu c_node
in
let succ = CFG.Node.succs nd graph in let rec block_addresses block =
BStd.Seq.fold succ (try BStd.Block.addr block
~init:accu |> to_int64_addr
~f:fold_one |> AddrSet.singleton
in with _ ->
do_find_succ AddrSet.empty nd (* Probably an intermediary node, eg. JMP --> [inermed node] --> BLK *)
let outputs = BStd.Graphs.Cfg.Node.outputs block cfg
|> BStd.Seq.map ~f:BStd.Graphs.Cfg.Edge.dst in
BStd.Seq.fold outputs
~init:AddrSet.empty
~f:(fun accu block -> AddrSet.union (block_addresses block) accu)
)
in in
let build_of_block accu_map node = let build_of_block cur_map block =
let blk = CFG.Node.label node in (try
let node_successors = node_successors_addr node in (* First, check that this block belongs to a subroutine *)
let instr_addresses = AddrSet.elements @@ addresses_in_block blk in let block_first_address = (
try
to_int64_addr @@ BStd.Block.addr block
with _ -> raise Block_not_in_subroutine) in
let sub_first_addr, sub_last_addr = (
try AddrMap.find_last
(fun start_addr -> start_addr <= block_first_address) sub_ranges
with Not_found ->
raise Block_not_in_subroutine
) in
let rec accumulate_mappings mappings addr_list = function (* Add the sequence of instuctions inside the block itself *)
| None -> mappings let cur_map, last_addr =
| Some (instr, instr_seq) as cur_instr -> build_of_instr_list cur_map (BStd.Block.insns block) in
let instr_addr = opt_addr_of_blk_elt instr in
match (map_option to_int64_addr instr_addr), addr_list with (* Then the set of possible destinations for the block terminator *)
| None, _ -> (match last_addr with
accumulate_mappings mappings addr_list @@ BStd.Seq.next instr_seq | Some last_addr ->
| Some cur_addr, next_addr::t when cur_addr >= next_addr -> let following_set = BStd.Graphs.Cfg.Node.outputs block cfg
accumulate_mappings mappings t cur_instr |> BStd.Seq.fold
| Some cur_addr, next_addr::_ -> ~init:AddrSet.empty
let n_mappings = AddrMap.add ~f:(fun set edge -> AddrSet.union
cur_addr (AddrSet.singleton next_addr) mappings in (block_addresses
accumulate_mappings n_mappings addr_list @@ BStd.Seq.next instr_seq (BStd.Graphs.Cfg.Edge.dst edge))
| Some cur_addr, [] -> set)
let n_mappings = AddrMap.add |> AddrSet.filter (fun addr ->
cur_addr node_successors mappings in sub_first_addr <= addr
accumulate_mappings n_mappings addr_list @@ BStd.Seq.next instr_seq && addr <= sub_last_addr)
in (* ^ We must ensure the landing address belongs
accumulate_mappings to the current subroutine for our purpose *)
accu_map in
instr_addresses AddrMap.add last_addr following_set cur_map
(BStd.Seq.next @@ BStd.Blk.elts blk) | None -> cur_map
)
with Block_not_in_subroutine ->
cur_map
)
in in
BStd.Seq.fold (CFG.nodes graph) BStd.Seq.fold (BStd.Graphs.Cfg.nodes cfg)
~init:AddrMap.empty ~init:AddrMap.empty
~f:build_of_block ~f:build_of_block
let find_rbp_pop_set cfg entry =
(** Returns a BStd.Tid.Set.t of the terms actually "popping" %rbp, that is,
the terms that should trigger a change to RbpUndef of the %rbp register.
The current heuristic is to consider the expressions
i) of the form `rbp = F(mem, rsp)` (alledgedly, rbp = something from the
stack);
ii) that are the last of this kind in the subroutine's CFG (ie. such that
there is not another instruction matching (i) that is reachable through
the CFG from the current instruction).
*)
let def_is_rbp_pop def =
let is_pop_expr expr =
let free_vars = BStd.Exp.free_vars expr in
let free_x86_regs = Regs.X86_64.map_varset free_vars in
(match Regs.DwRegOptSet.cardinal free_x86_regs with
| 2 ->
let reg = free_x86_regs
|> Regs.DwRegOptSet.filter
(fun x -> match x with None -> false | Some _ -> true)
|> Regs.DwRegOptSet.choose in
let has_mem_var = BStd.Var.Set.exists
~f:(fun x -> BStd.Var.name x = "mem")
free_vars in
(match reg, has_mem_var with
| Some dw_var, true when dw_var = Regs.X86_64.rsp -> true
| _ -> false)
| _ -> false
)
in
(match Regs.X86_64.of_var (BStd.Def.lhs def),
is_pop_expr @@ BStd.Def.rhs def with
| Some reg, true when reg = Regs.X86_64.rbp -> true
| _ -> false
)
in
let block_find_rbp_pop block =
let fold_elt = function
| `Def(def) when (def_is_rbp_pop def) -> Some (BStd.Term.tid def)
| _ -> None
in
let elts_seq = BStd.Blk.elts block in
let last_pop = BStd.Seq.fold elts_seq
~init:None
~f:(fun accu elt ->
(match fold_elt elt with
| None -> accu
| Some tid -> Some tid))
in
last_pop
in
let rec block_dfs node visited =
(* DFS on the CFG to find rbp pops, and rule out those that are not final
*)
let block = CFG.Node.label node in
(match BStd.Blk.Set.mem visited block with
| true ->
(* Loop: we already visited this node *)
BStd.Tid.Set.empty, true, visited
| false ->
let visited = BStd.Blk.Set.add visited block in
let pop_set, has_pop, visited =
BStd.Seq.fold (CFG.Node.succs node cfg)
~f:(fun (pre_pop_set, pre_has_pop, visited) child ->
let cur_pop_set, cur_has_pop, visited =
block_dfs child visited in
(BStd.Tid.Set.union pre_pop_set cur_pop_set),
(pre_has_pop || cur_has_pop),
visited
)
~init:(BStd.Tid.Set.empty, false, visited)
in
let pop_set, has_pop = (match has_pop with
| false -> (* No rbp pop below, we seek rbp pops in this block *)
(match block_find_rbp_pop block with
| None -> pop_set, false
| Some tid -> BStd.Tid.Set.add pop_set tid, true
)
| true -> pop_set, has_pop) in
pop_set, has_pop, visited
)
in
if __settings.no_rbp_undef then
BStd.Tid.Set.empty
else (
let pop_set, _, _ =
block_dfs entry (BStd.Blk.Set.empty) in
pop_set
)
let interpret_var_expr c_var offset expr = BStd.Bil.( let interpret_var_expr c_var offset expr = BStd.Bil.(
let closed_form = BStd.Exp.substitute let closed_form = BStd.Exp.substitute
(var c_var) (var c_var)
@ -212,13 +353,14 @@ let is_single_free_reg expr =
) )
let process_def (local_state: block_local_state) def (cur_reg: reg_pos) let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
allow_rbp
: (reg_pos option * block_local_state) = : (reg_pos option * block_local_state) =
let lose_track = Some CfaLostTrack in let lose_track = Some CfaLostTrack in
let cur_cfa, cur_rbp = cur_reg in let cur_cfa, cur_rbp = cur_reg in
let out_cfa = let out_cfa =
(match cur_cfa, Regs.X86_64.of_var (BStd.Def.lhs def) with (match cur_cfa, Regs.X86_64.of_var (BStd.Def.lhs def), allow_rbp with
| RspOffset(cur_offset), Some reg when reg = Regs.X86_64.rsp -> | RspOffset(cur_offset), Some reg, _ when reg = Regs.X86_64.rsp ->
let exp = BStd.Def.rhs def in let exp = BStd.Def.rhs def in
(match is_single_free_reg exp with (match is_single_free_reg exp with
| Some (bil_var, dw_var) when dw_var = Regs.X86_64.rsp -> | Some (bil_var, dw_var) when dw_var = Regs.X86_64.rsp ->
@ -230,7 +372,7 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
) )
| _ -> lose_track | _ -> lose_track
) )
| RspOffset(cur_offset), Some reg when reg = Regs.X86_64.rbp -> | RspOffset(cur_offset), Some reg, true when reg = Regs.X86_64.rbp ->
(* We have CFA=rsp+k and a line %rbp <- [expr]. (* We have CFA=rsp+k and a line %rbp <- [expr].
Might be a %rbp <- %rsp *) Might be a %rbp <- %rsp *)
let exp = BStd.Def.rhs def in let exp = BStd.Def.rhs def in
@ -251,7 +393,7 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
) )
| _ -> None | _ -> None
) )
| RbpOffset(cur_offset), Some reg when reg = Regs.X86_64.rbp -> | RbpOffset(cur_offset), Some reg, true when reg = Regs.X86_64.rbp ->
(* Assume we are overwriting %rbp with something — we must revert to (* Assume we are overwriting %rbp with something — we must revert to
some rsp-based indexing *) some rsp-based indexing *)
(* FIXME don't assume the rsp offset will always be 8, find a smart way (* FIXME don't assume the rsp offset will always be 8, find a smart way
@ -260,6 +402,7 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
value is read from the stack. value is read from the stack.
*) *)
Some (RspOffset(Int64.of_int 16)) Some (RspOffset(Int64.of_int 16))
| RbpOffset _, _, false -> assert false
| _ -> None | _ -> None
) in ) in
@ -288,25 +431,6 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
(has_rbp_var || has_intermed_rbp_var)) (has_rbp_var || has_intermed_rbp_var))
in in
let is_pop_expr expr =
let free_vars = BStd.Exp.free_vars expr in
let free_x86_regs = Regs.X86_64.map_varset free_vars in
(match Regs.DwRegOptSet.cardinal free_x86_regs with
| 2 ->
let reg = free_x86_regs
|> Regs.DwRegOptSet.filter
(fun x -> match x with None -> false | Some _ -> true)
|> Regs.DwRegOptSet.choose in
let has_mem_var = BStd.Var.Set.exists
~f:(fun x -> BStd.Var.name x = "mem")
free_vars in
(match reg, has_mem_var with
| Some dw_var, true when dw_var = Regs.X86_64.rsp -> true
| _ -> false)
| _ -> false
)
in
let is_rbp_expr expr = let is_rbp_expr expr =
let free_vars = BStd.Exp.free_vars expr in let free_vars = BStd.Exp.free_vars expr in
let free_x86_regs = Regs.X86_64.map_varset free_vars in let free_x86_regs = Regs.X86_64.map_varset free_vars in
@ -369,13 +493,14 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
new_rbp, cur_state new_rbp, cur_state
| RbpCfaOffset offs -> | RbpCfaOffset offs ->
(* We go back to RbpUndef when encountering something like a `pop rbp`, (* We go back to RbpUndef if the current def is in the rbp_pop_set --
that is, RBP <- f(RSP, mem) *) see `find_rbp_pop_set` *)
(match Regs.X86_64.of_var (BStd.Def.lhs def),
is_pop_expr @@ BStd.Def.rhs def with (match BStd.Tid.Set.mem (local_state.rbp_pop_set) @@ BStd.Term.tid def
| Some reg, true when reg = Regs.X86_64.rbp -> with
| true ->
Some RbpUndef, local_state Some RbpUndef, local_state
| _ -> None, local_state | false -> None, local_state
) )
) )
in in
@ -404,7 +529,8 @@ let process_jmp jmp (cur_reg: reg_pos)
| _ -> None | _ -> None
let process_blk let process_blk
next_instr_graph (block_init: reg_pos) blk : (reg_changes_fde * reg_pos) = next_instr_graph rbp_pop_set allow_rbp (block_init: reg_pos) blk
: (reg_changes_fde * reg_pos) =
(** Extracts the registers (CFA+RBP) changes of a block. *) (** Extracts the registers (CFA+RBP) changes of a block. *)
let apply_offset cur_addr_opt ((accu:reg_changes_fde), cur_reg, local_state) let apply_offset cur_addr_opt ((accu:reg_changes_fde), cur_reg, local_state)
@ -424,7 +550,8 @@ let process_blk
let fold_elt (accu, cur_reg, cur_local_state) elt = match elt with let fold_elt (accu, cur_reg, cur_local_state) elt = match elt with
| `Def(def) -> | `Def(def) ->
let new_offset, new_state = process_def cur_local_state def cur_reg in let new_offset, new_state =
process_def cur_local_state def cur_reg allow_rbp in
apply_offset apply_offset
(opt_addr_of def) (accu, cur_reg, new_state) new_offset (opt_addr_of def) (accu, cur_reg, new_state) new_offset
| `Jmp(jmp) -> | `Jmp(jmp) ->
@ -442,7 +569,8 @@ let process_blk
) in ) in
let empty_local_state = { let empty_local_state = {
rbp_vars = BStd.Var.Set.empty rbp_vars = BStd.Var.Set.empty;
rbp_pop_set = rbp_pop_set
} in } in
let elts_seq = BStd.Blk.elts blk in let elts_seq = BStd.Blk.elts blk in
let out_reg, end_reg, _ = BStd.Seq.fold elts_seq let out_reg, end_reg, _ = BStd.Seq.fold elts_seq
@ -525,18 +653,21 @@ let cleanup_fde (fde_changes: reg_changes_fde) : reg_changes_fde =
match AddrMap.fold fold_one fde_changes (AddrMap.empty, None, false) with match AddrMap.fold fold_one fde_changes (AddrMap.empty, None, false) with
| out, _, _ -> out | out, _, _ -> out
let process_sub sub : subroutine_cfa_data = let process_sub sub next_instr_graph : subroutine_cfa_data =
(** Extracts the `cfa_changes_fde` of a subroutine *) (** Extracts the `cfa_changes_fde` of a subroutine *)
let cfg = BStd.Sub.to_cfg sub in let cfg = BStd.Sub.to_cfg sub in
let next_instr_graph = build_next_instr cfg in
let first_addr = int64_addr_of sub in let first_addr = int64_addr_of sub in
let last_addr = find_last_addr sub in let last_addr = find_last_addr sub in
let initial_cfa_rsp_offset = Int64.of_int 8 in let initial_cfa_rsp_offset = Int64.of_int 8 in
let entry_blk = get_entry_blk cfg in
let rbp_pop_set = find_rbp_pop_set cfg entry_blk in
let rec dfs_process let rec dfs_process
allow_rbp
(sub_changes: (reg_changes_fde * reg_pos) TIdMap.t) (sub_changes: (reg_changes_fde * reg_pos) TIdMap.t)
node node
(entry_offset: reg_pos) = (entry_offset: reg_pos) =
@ -549,23 +680,56 @@ let process_sub sub : subroutine_cfa_data =
| None -> | None ->
(* Not yet visited: compute the changes *) (* Not yet visited: compute the changes *)
let cur_blk_changes, end_reg = let cur_blk_changes, end_reg =
process_blk next_instr_graph entry_offset cur_blk in process_blk next_instr_graph rbp_pop_set
allow_rbp entry_offset cur_blk in
let n_sub_changes = let n_sub_changes =
TIdMap.add tid (cur_blk_changes, entry_offset) sub_changes in TIdMap.add tid (cur_blk_changes, entry_offset) sub_changes in
BStd.Seq.fold (CFG.Node.succs node cfg) BStd.Seq.fold (CFG.Node.succs node cfg)
~f:(fun accu child -> dfs_process accu child end_reg) ~f:(fun accu child -> dfs_process allow_rbp accu child end_reg)
~init:n_sub_changes ~init:n_sub_changes
| Some (_, former_entry_offset) -> | Some (_, former_entry_offset) ->
(* Already visited: check that entry values are matching *) (* Already visited: check that entry values are matching *)
if entry_offset <> former_entry_offset then if entry_offset <> former_entry_offset then (
if allow_rbp then
Format.eprintf "Found inconsistency (0x%Lx): %a -- %a@."
(int64_addr_of cur_blk)
pp_reg_pos entry_offset pp_reg_pos former_entry_offset ;
raise (Inconsistent tid) raise (Inconsistent tid)
)
else else
sub_changes sub_changes
in in
let entry_blk = get_entry_blk cfg in let with_rbp_if_needed initial_offset =
(* Tries first without allowing CFA=rbp+k, then allowing it if the first
result was either inconsistent or lost track *)
let not_losing_track synth_result =
let lost_track = TIdMap.exists
(fun _ (_, (cfa_pos, _)) -> match cfa_pos with
| CfaLostTrack -> true
| _ -> false) synth_result
in
(match lost_track with
| true -> None
| false -> Some synth_result)
in
let without_rbp =
(try
dfs_process false TIdMap.empty entry_blk initial_offset
|> not_losing_track
with Inconsistent _ -> None
)
in
(match without_rbp with
| Some correct_res -> correct_res
| None ->
dfs_process true TIdMap.empty entry_blk initial_offset)
in
let initial_offset = (RspOffset initial_cfa_rsp_offset, RbpUndef) in let initial_offset = (RspOffset initial_cfa_rsp_offset, RbpUndef) in
let changes_map = dfs_process TIdMap.empty entry_blk initial_offset in (* Try first without rbp, then with rbp upon failure *)
let changes_map = with_rbp_if_needed initial_offset in
let merged_changes = TIdMap.fold let merged_changes = TIdMap.fold
(fun _ (cfa_changes, _) accu -> AddrMap.union (fun _ v1 v2 -> (fun _ (cfa_changes, _) accu -> AddrMap.union (fun _ v1 v2 ->
@ -587,12 +751,16 @@ let process_sub sub : subroutine_cfa_data =
output output
let of_prog prog : subroutine_cfa_map = let of_prog prog next_instr_graph : subroutine_cfa_map =
(** Extracts the `cfa_changes` of a program *) (** Extracts the `cfa_changes` of a program *)
let fold_step accu sub = let fold_step accu sub =
(try (try
let subroutine_data = process_sub sub in (match is_ghost_sub sub with
StrMap.add (BStd.Sub.name sub) subroutine_data accu | true -> accu
| false ->
let subroutine_data = process_sub sub next_instr_graph in
StrMap.add (BStd.Sub.name sub) subroutine_data accu
)
with with
| InvalidSub -> accu | InvalidSub -> accu
| Inconsistent tid -> | Inconsistent tid ->
@ -606,10 +774,34 @@ let of_prog prog : subroutine_cfa_map =
~init:StrMap.empty ~init:StrMap.empty
~f:fold_step ~f:fold_step
let of_proj proj : subroutine_cfa_map = let build_sub_ranges prog: (memory_address) AddrMap.t =
(** Builds a map mapping the first address of each subroutine to its last
address. This map can be interpreted as a list of address ranges with
easy fast access to a member (cf Map.S.find_first) *)
let fold_subroutine accu sub =
(match is_ghost_sub sub with
| true -> accu
| false ->
let first_addr = int64_addr_of sub in
let last_addr = find_last_addr sub in
AddrMap.add first_addr (last_addr) accu
)
in
let subroutines = BStd.Term.enum BStd.sub_t prog in
BStd.Seq.fold subroutines
~init:AddrMap.empty
~f:fold_subroutine
let of_proj no_rbp_undef proj : subroutine_cfa_map =
(** Extracts the `cfa_changes` of a project *) (** Extracts the `cfa_changes` of a project *)
__settings.no_rbp_undef <- no_rbp_undef ;
let prog = BStd.Project.program proj in let prog = BStd.Project.program proj in
of_prog prog let sub_ranges = build_sub_ranges prog in
let next_instr_graph =
build_next_instr sub_ranges (BStd.Project.disasm proj) in
of_prog prog next_instr_graph
let clean_lost_track_subs pre_dwarf : subroutine_cfa_map = let clean_lost_track_subs pre_dwarf : subroutine_cfa_map =
(** Removes the subroutines on which we lost track from [pre_dwarf] *) (** Removes the subroutines on which we lost track from [pre_dwarf] *)

1
csmith/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
0[0-9]/

View file

@ -32,10 +32,105 @@ def parse_fde_head(line):
return pc_beg, pc_end return pc_beg, pc_end
def parse_fde_row(line, ra_col): def detect_clang_flat_to_pyramid(rows):
""" Artificially repair clang flat callee-saved saving to a gcc pyramid-like shape.
Eg., clang will generate
LOC CFA rbx rbp ra
0000000000007180 rsp+8 u u c-8
0000000000007181 rsp+16 u u c-8
0000000000007182 rsp+24 u u c-8
0000000000007189 rsp+7632 c-24 c-16 c-8
while we would wish to have
LOC CFA rbx rbp ra
0000000000007180 rsp+8 u u c-8
0000000000007181 rsp+16 u c-16 c-8
0000000000007182 rsp+24 c-24 c-16 c-8
0000000000007189 rsp+7632 c-24 c-16 c-8
The repair strategy is as follows:
- ignore the implicit first row
- find the first k lines such that only CFA changes from line to line, with a delta
of +8, with address delta of 2. (push is 2 bytes long)
- for every callee-saved R that concerns us and that is defined at line k+1 with
offset c-x, while rsp+x is the CFA of line k' <= k, redefine R as c-k in lines
[k'; k[
"""
def try_starting_at(start_row):
if len(rows) < start_row + 1: # Ensure we have at least the start row
return rows, False
flatness_row_id = start_row
if rows[1]["CFA"][:4] != "rsp+":
return rows, False
first_cfa = int(rows[start_row]["CFA"][4:])
prev_cfa = first_cfa
prev_loc = rows[start_row]["LOC"]
for row in rows[start_row + 1 :]:
for reg in row:
if reg not in ["LOC", "CFA", "ra"] and row[reg] != "u":
break
cfa = row["CFA"]
if cfa[:4] != "rsp+":
break
cfa_offset = int(cfa[4:])
if cfa_offset != prev_cfa + 8:
break
prev_cfa += 8
loc = row["LOC"]
if loc > prev_loc + 2:
break
prev_loc = loc
flatness_row_id += 1
flatness_row_id += 1
if flatness_row_id - start_row <= 1 or flatness_row_id >= len(rows):
return rows, False # nothing to change
flatness_row = rows[flatness_row_id]
reg_changes = {}
for reg in flatness_row:
if reg in ["LOC", "CFA", "ra"]:
continue
rule = flatness_row[reg]
if rule[:2] != "c-":
return rows, False # Not a flat_to_pyramid after all
rule_offset = int(rule[2:])
rule_offset_rectified = rule_offset - first_cfa
if rule_offset_rectified % 8 != 0:
return rows, False
row_change_id = rule_offset_rectified // 8 + start_row
reg_changes[reg] = (row_change_id, rule)
for reg in reg_changes:
change_from, rule = reg_changes[reg]
for row in rows[change_from:flatness_row_id]:
row[reg] = rule
return rows, True
for start_row in [1, 2]:
mod_rows, modified = try_starting_at(start_row)
if modified:
return mod_rows
return rows
def parse_fde_row(line, reg_cols):
vals = list(map(lambda x: x.strip(), line.split())) vals = list(map(lambda x: x.strip(), line.split()))
assert len(vals) > ra_col # ra is the rightmost useful column assert len(vals) > reg_cols["ra"] # ra is the rightmost useful column
out = {"LOC": int(vals[0], 16), "CFA": vals[1], "ra": vals[ra_col]}
out = {"LOC": int(vals[0], 16), "CFA": vals[1]}
for reg in reg_cols:
col_id = reg_cols[reg]
out[reg] = vals[col_id]
if "rbp" not in out:
out["rbp"] = "u"
return out return out
@ -47,7 +142,14 @@ def clean_rows(rows):
out_rows = [rows[0]] out_rows = [rows[0]]
for row in rows[1:]: for row in rows[1:]:
if not row == out_rows[-1]: if not row == out_rows[-1]:
out_rows.append(row) filtered_row = row
filter_out = []
for reg in filtered_row:
if reg not in ["LOC", "CFA", "rbp", "ra"]:
filter_out.append(reg)
for reg in filter_out:
filtered_row.pop(reg)
out_rows.append(filtered_row)
return out_rows return out_rows
@ -58,14 +160,28 @@ def parse_fde(lines):
except NotFDE: except NotFDE:
return return
rows = [{"LOC": 0, "CFA": "rsp+8", "ra": "c-8"}] # Implicit CIE row rows = [{"LOC": 0, "CFA": "rsp+8", "rbp": "u", "ra": "c-8"}] # Implicit CIE row
if len(lines) >= 2: # Has content if len(lines) >= 2: # Has content
head_row = list(map(lambda x: x.strip(), lines[1].split())) head_row = list(map(lambda x: x.strip(), lines[1].split()))
ra_col = head_row.index("ra") reg_cols = {}
for pos, reg in enumerate(head_row):
if reg not in ["LOC", "CFA"]:
reg_cols[reg] = pos
for line in lines[2:]: for line in lines[2:]:
rows.append(parse_fde_row(line, ra_col)) rows.append(parse_fde_row(line, reg_cols))
# if pc_beg == 0x1160:
# print("===== FDE: {}..{} ====".format(hex(pc_beg), hex(pc_end)))
# print("BEFORE:")
# for row in rows:
# print(row)
rows = detect_clang_flat_to_pyramid(rows)
# if pc_beg == 0x1160:
# print("AFTER:")
# for row in rows:
# print(row)
return {"beg": pc_beg, "end": pc_end, "rows": clean_rows(rows)} return {"beg": pc_beg, "end": pc_end, "rows": clean_rows(rows)}
@ -151,7 +267,7 @@ def dump_light_fdes(fdes):
def match_fde(orig, synth): def match_fde(orig, synth):
def vals_of(row): def vals_of(row):
return {"CFA": row["CFA"], "ra": row["ra"]} return {"CFA": row["CFA"], "ra": row["ra"], "rbp": row["rbp"]}
def loc_of(rch): def loc_of(rch):
return rch[1]["LOC"] return rch[1]["LOC"]
@ -165,7 +281,7 @@ def match_fde(orig, synth):
rowchanges.append((typ, row)) rowchanges.append((typ, row))
rowchanges.sort(key=loc_of) rowchanges.sort(key=loc_of)
matching = True mismatch_count = 0
for rowid, rowch in enumerate(rowchanges): for rowid, rowch in enumerate(rowchanges):
typ, row = rowch[0], rowch[1] typ, row = rowch[0], rowch[1]
cur_val[typ] = vals_of(row) cur_val[typ] = vals_of(row)
@ -180,12 +296,17 @@ def match_fde(orig, synth):
hex(row["LOC"]), cur_val[0], cur_val[1] hex(row["LOC"]), cur_val[0], cur_val[1]
) )
) )
matching = False mismatch_count += 1
return matching return mismatch_count
def parse_sym_table(handle): def parse_sym_table(handle):
def readint(x):
if x.startswith("0x"):
return int(x[2:], 16)
return int(x)
out_map = {} out_map = {}
for line in handle: for line in handle:
line = line.strip() line = line.strip()
@ -194,7 +315,7 @@ def parse_sym_table(handle):
spl = list(map(lambda x: x.strip(), line.split())) spl = list(map(lambda x: x.strip(), line.split()))
loc = int(spl[1], 16) loc = int(spl[1], 16)
size = int(spl[2]) size = readint(spl[2])
name = spl[7] name = spl[7]
out_map[name] = (loc, size) out_map[name] = (loc, size)
return out_map return out_map
@ -227,15 +348,30 @@ def main():
mismatches = 0 mismatches = 0
for (orig, synth) in matched: for (orig, synth) in matched:
if not match_fde(orig, synth): mismatches += match_fde(orig, synth)
mismatches += 1
reports = [] reports = []
if mismatches > 0: if mismatches > 0:
reports.append("{} mismatches".format(mismatches)) reports.append("{} mismatches".format(mismatches))
if unmatched_orig: if unmatched_orig:
reports.append("{} unmatched (orig)".format(len(unmatched_orig))) worth_reporting = False
for unmatched in unmatched_orig:
if len(unmatched["rows"]) > 1:
worth_reporting = True
break
if worth_reporting:
unmatched_addrs = [fde_pos(fde) for fde in unmatched_orig]
reports.append(
"{} unmatched (orig): {}".format(
len(unmatched_orig), ", ".join(unmatched_addrs)
)
)
if unmatched_synth: if unmatched_synth:
reports.append("{} unmatched (synth)".format(len(unmatched_synth))) unmatched_addrs = [fde_pos(fde) for fde in unmatched_synth]
reports.append(
"{} unmatched (synth): {}".format(
len(unmatched_synth), ", ".join(unmatched_addrs)
)
)
if reports: if reports:
print("{}: {}".format(test_name, "; ".join(reports))) print("{}: {}".format(test_name, "; ".join(reports)))

View file

@ -1,6 +1,12 @@
#!/bin/bash #!/bin/bash
USAGE="$0 output_dir nb_tests" USAGE="$0 output_dir nb_tests
You may also set COMPILE_CMD to eg. 'gcc -O2' if you want to override the
default command."
if [ -z "$COMPILE_CMD" ] ; then
COMPILE_CMD='gcc -O2'
fi
if [ "$#" -lt 2 ] ; then if [ "$#" -lt 2 ] ; then
>&2 echo -e "Missing argument(s). Usage:\n$USAGE" >&2 echo -e "Missing argument(s). Usage:\n$USAGE"
@ -28,11 +34,11 @@ for _num in $(seq 1 $NB_TESTS); do
csmith > "$path.c" csmith > "$path.c"
sed -i 's/^static \(.* func_\)/\1/g' "$path.c" sed -i 's/^static \(.* func_\)/\1/g' "$path.c"
echo -ne "\r>>> $num.bin " echo -ne "\r>>> $num.bin "
gcc -O2 -I/usr/include/csmith-2.3.0/ -w "$path.c" -o "$path.orig.bin" $COMPILE_CMD -I/usr/include/csmith-2.3.0/ -w "$path.c" -o "$path.orig.bin"
objcopy --remove-section '.eh_frame' --remove-section '.eh_frame_hdr' \ objcopy --remove-section '.eh_frame' --remove-section '.eh_frame_hdr' \
"$path.orig.bin" "$path.bin" "$path.orig.bin" "$path.bin"
echo -ne "\r>>> $num.eh.bin " echo -ne "\r>>> $num.eh.bin "
../synthesize_dwarf.sh "$path.bin" "$path.eh.bin" BAP_ARGS='--dwarfsynth-no-rbp-undef' ../synthesize_dwarf.sh "$path.bin" "$path.eh.bin"
if [ "$check_gen_eh_frame" -gt 0 ] ; then if [ "$check_gen_eh_frame" -gt 0 ] ; then
./check_generated_eh_frame.sh "$path" ./check_generated_eh_frame.sh "$path"

1
curiosities/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
*.bin

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -20,8 +20,18 @@ module Cmdline = struct
~default:"tmp.marshal" ~default:"tmp.marshal"
) )
let no_rbp_undef = Cnf.(
param (bool) "no-rbp-undef"
~doc:("Do not unset %rbp after it has been set once in a FDE. "
^"This mimics gcc eh_frame for ease of validation.")
~as_flag:true
~default:false
)
let () = Cnf.( let () = Cnf.(
when_ready ((fun {get=(!!)} -> when_ready ((fun {get=(!!)} ->
Bap.Std.Project.register_pass' (main !!outfile))) Bap.Std.Project.register_pass' (main
~no_rbp_undef:!!no_rbp_undef
!!outfile )))
) )
end end

View file

@ -35,7 +35,9 @@ function find_ml_dwarf_write {
} }
function bap_synth { function bap_synth {
bap "$INPUT_FILE" --no-byteweight -p dwarfsynth --dwarfsynth-output "$TMP_DIR/marshal" \ bap "$INPUT_FILE" \
--no-byteweight -p dwarfsynth \
--dwarfsynth-output "$TMP_DIR/marshal" $BAP_ARGS \
> /dev/null > /dev/null
return $? return $?
} }

1
test/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
*.bin

19
test/inline_asm.c Normal file
View file

@ -0,0 +1,19 @@
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char** argv) {
if(argc < 2) {
fprintf(stderr, "Missing argument: loop upper bound.\n");
exit(1);
}
int upper_bound = atoi(argv[1]);
int count = 0;
for(int i=0; i < upper_bound; ++i) {
__asm__("sub $8, %rsp; movq $42, (%rsp)");
count++;
__asm__("add $8, %rsp");
}
printf("%d\n", count);
return 0;
}

28
test/without_rbp_col.c Normal file
View file

@ -0,0 +1,28 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
void rbp_bump_2(int z) {
for(int cz=1; cz < z; cz++) {
int x[cz];
x[cz / 2] = 8;
}
}
void rbp_bump_1(int y) {
for(int cy=1; cy < y; cy++) {
int x[cy];
x[cy / 2] = 8;
rbp_bump_2(x[cy/2]);
}
}
int main(int argc, char** argv) {
if(argc < 2) {
fprintf(stderr, "Missing argument: n\n");
return 1;
}
int num = atoi(argv[1]);
rbp_bump_1(num);
return 0;
}