dwarf-synthesis/DwarfSynth/Simplest.ml

625 lines
20 KiB
OCaml

open Std
module CFG = BStd.Graphs.Ir
type memory_offset = int64
type memory_address = int64
module AddrMap = Map.Make(Int64)
module AddrSet = Set.Make(Int64)
type cfa_pos =
RspOffset of memory_offset
| RbpOffset of memory_offset
| CfaLostTrack
type rbp_pos =
| RbpUndef
| RbpCfaOffset of memory_offset
type reg_pos = cfa_pos * rbp_pos
type reg_changes_fde = reg_pos AddrMap.t
type subroutine_cfa_data = {
reg_changes_fde: reg_changes_fde;
beg_pos: memory_address;
end_pos: memory_address;
}
type block_local_state = {
rbp_vars: BStd.Var.Set.t
}
module StrMap = Map.Make(String)
type subroutine_cfa_map = subroutine_cfa_data StrMap.t
module TIdMap = Map.Make(BStd.Tid)
exception InvalidSub
exception UnexpectedRbpSet
let pp_cfa_pos ppx = function
| RspOffset off -> Format.fprintf ppx "RSP + (%s)" (Int64.to_string off)
| RbpOffset off -> Format.fprintf ppx "RBP + (%s)" (Int64.to_string off)
| CfaLostTrack -> Format.fprintf ppx "??@."
let pp_int64_hex ppx number =
let mask_short = Int64.(pred (shift_left one 16)) in
let pp_short number =
Format.fprintf ppx "%04x" Int64.(to_int (logand number mask_short))
in
List.iter pp_short @@ List.map (fun x ->
Int64.(shift_right number (16*x))) [3;2;1;0]
let pp_cfa_changes_fde ppx cfa_changes = AddrMap.iter
(fun addr change ->
Format.fprintf ppx "%a: %a@."
pp_int64_hex addr
pp_cfa_pos change)
cfa_changes
let pp_cfa_changes ppx =
StrMap.iter (fun fde_name entry ->
Format.fprintf ppx "%s@\n====@\n@\n%a@." fde_name
pp_cfa_changes_fde entry)
let pp_option_of sub_pp ppx = function
| None -> Format.fprintf ppx "None"
| Some x -> Format.fprintf ppx "Some %a" sub_pp x
let opt_addr_of term =
(** Get the address of a term as an option, if it has one*)
BStd.Term.get_attr term BStd.address
let addr_of term =
(** Get the address of a term *)
match opt_addr_of term with
| None -> assert false
| Some addr -> addr
let opt_addr_of_blk_elt = function
| `Def def -> opt_addr_of def
| `Jmp jmp -> opt_addr_of jmp
| `Phi phi -> opt_addr_of phi
let entrypoint_address blk =
(** Find the first instruction address in the current block.
Return None if no instruction has address. *)
let fold_one accu cur_elt = match accu, opt_addr_of_blk_elt cur_elt with
| None, None -> None
| None, Some x -> Some x
| _, _ -> accu
in
BStd.Seq.fold (BStd.Blk.elts blk)
~init:None
~f:fold_one
let to_int64_addr addr =
BStd.Word.to_int64_exn addr
let int64_addr_of x = to_int64_addr @@ addr_of x
let map_option f = function
| None -> None
| Some x -> Some (f x)
let build_next_instr graph =
(** Build a map of memory_address -> AddrSet.t holding, for each address, the
set of instructions coming right after the instruction at given address.
There might be multiple such addresses, if the current instruction is at
a point of branching. *)
let addresses_in_block blk =
(** Set of addresses present in the block *)
BStd.Seq.fold (BStd.Blk.elts blk)
~init:AddrSet.empty
~f:(fun accu elt ->
let addr = opt_addr_of_blk_elt elt in
match addr with
| None -> accu
| Some x ->
(try
AddrSet.add (BStd.Word.to_int64_exn x) accu
with _ -> accu)
)
in
let node_successors_addr (nd: CFG.node) : AddrSet.t =
let rec do_find_succ accu nd =
let fold_one accu c_node =
match entrypoint_address (CFG.Node.label c_node) with
| Some addr ->
(try
AddrSet.add (BStd.Word.to_int64_exn addr) accu
with _ -> accu)
| None -> do_find_succ accu c_node
in
let succ = CFG.Node.succs nd graph in
BStd.Seq.fold succ
~init:accu
~f:fold_one
in
do_find_succ AddrSet.empty nd
in
let build_of_block accu_map node =
let blk = CFG.Node.label node in
let node_successors = node_successors_addr node in
let instr_addresses = AddrSet.elements @@ addresses_in_block blk in
let rec accumulate_mappings mappings addr_list = function
| None -> mappings
| Some (instr, instr_seq) as cur_instr ->
let instr_addr = opt_addr_of_blk_elt instr in
match (map_option to_int64_addr instr_addr), addr_list with
| None, _ ->
accumulate_mappings mappings addr_list @@ BStd.Seq.next instr_seq
| Some cur_addr, next_addr::t when cur_addr >= next_addr ->
accumulate_mappings mappings t cur_instr
| Some cur_addr, next_addr::_ ->
let n_mappings = AddrMap.add
cur_addr (AddrSet.singleton next_addr) mappings in
accumulate_mappings n_mappings addr_list @@ BStd.Seq.next instr_seq
| Some cur_addr, [] ->
let n_mappings = AddrMap.add
cur_addr node_successors mappings in
accumulate_mappings n_mappings addr_list @@ BStd.Seq.next instr_seq
in
accumulate_mappings
accu_map
instr_addresses
(BStd.Seq.next @@ BStd.Blk.elts blk)
in
BStd.Seq.fold (CFG.nodes graph)
~init:AddrMap.empty
~f:build_of_block
let interpret_var_expr c_var offset expr = BStd.Bil.(
let closed_form = BStd.Exp.substitute
(var c_var)
(int (BStd.Word.of_int64 (Int64.neg offset)))
expr
in
let res = BStd.Exp.eval closed_form in
match res with
| Imm value ->
Some (Int64.neg @@ BStd.Word.to_int64_exn @@ BStd.Word.signed value)
| _ -> None
)
let is_single_free_reg expr =
(** Detects whether `expr` contains a single free variable which is a machine
register, and if so, extracts this register and returns a pair `(formal,
reg`) of its formal variable in the expression and the register.
Otherwise return None. *)
let free_vars = BStd.Exp.free_vars expr in
let free_x86_regs = Regs.X86_64.map_varset free_vars in
(match Regs.DwRegOptSet.cardinal free_x86_regs with
| 1 ->
let free_var = Regs.DwRegOptSet.choose free_x86_regs in
(match free_var with
| Some dw_var ->
let bil_var = (match BStd.Var.Set.choose free_vars with
| None -> assert false
| Some x -> x) in
Some (bil_var, dw_var)
| _ -> None
)
| _ -> None
)
let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
: (reg_pos option * block_local_state) =
let lose_track = Some CfaLostTrack in
let cur_cfa, cur_rbp = cur_reg in
let out_cfa =
(match cur_cfa, Regs.X86_64.of_var (BStd.Def.lhs def) with
| RspOffset(cur_offset), Some reg when reg = Regs.X86_64.rsp ->
let exp = BStd.Def.rhs def in
(match is_single_free_reg exp with
| Some (bil_var, dw_var) when dw_var = Regs.X86_64.rsp ->
let interpreted = interpret_var_expr bil_var cur_offset exp in
(match interpreted with
| None -> lose_track
| Some new_offset ->
Some (RspOffset(new_offset))
)
| _ -> lose_track
)
| RspOffset(cur_offset), Some reg when reg = Regs.X86_64.rbp ->
(* We have CFA=rsp+k and a line %rbp <- [expr].
Might be a %rbp <- %rsp *)
let exp = BStd.Def.rhs def in
(match is_single_free_reg exp with
| Some (bil_var, dw_var) when dw_var = Regs.X86_64.rsp ->
(* We have %rbp := F(%rsp) *)
(* FIXME we wish to have %rbp := %rsp. An ugly and non-robust test to
check that would be interpret F(0), expecting that F is at worst
affine - then a restult of 0 means that %rbp := %rsp + 0 *)
let interpreted = interpret_var_expr bil_var (Int64.zero) exp in
(match interpreted with
| Some offset when offset = Int64.zero ->
Some (RbpOffset(cur_offset))
| _ ->
(* Previous instruction was rsp-indexed, here we put something
weird in %rbp, let's keep indexing with rsp and do nothing *)
None
)
| _ -> None
)
| RbpOffset(cur_offset), Some reg when reg = Regs.X86_64.rbp ->
(* Assume we are overwriting %rbp with something — we must revert to
some rsp-based indexing *)
(* FIXME don't assume the rsp offset will always be 8, find a smart way
to figure this out.
We actually use offset 16 because the `pop` will occur after the
value is read from the stack.
*)
Some (RspOffset(Int64.of_int 16))
| _ -> None
) in
let is_rbp_save_expr expr local_state =
let free_vars = BStd.Exp.free_vars expr in
let card = BStd.Var.Set.length free_vars in
let has_mem_var = BStd.Var.Set.exists
~f:(fun x -> BStd.Var.name x = "mem")
free_vars in
let free_x86_regs = Regs.X86_64.map_varset free_vars in
let has_rsp_var = free_x86_regs
|> Regs.DwRegOptSet.exists
(fun x -> match x with
| Some x when x = Regs.X86_64.rsp -> true
| _ -> false) in
let has_rbp_var = free_x86_regs
|> Regs.DwRegOptSet.exists
(fun x -> match x with
| Some x when x = Regs.X86_64.rbp -> true
| _ -> false) in
let has_intermed_rbp_var = free_vars
|> BStd.Var.Set.inter local_state.rbp_vars
|> BStd.Var.Set.is_empty
|> not in
(card = 3 && has_mem_var && has_rsp_var &&
(has_rbp_var || has_intermed_rbp_var))
in
let is_pop_expr expr =
let free_vars = BStd.Exp.free_vars expr in
let free_x86_regs = Regs.X86_64.map_varset free_vars in
(match Regs.DwRegOptSet.cardinal free_x86_regs with
| 2 ->
let reg = free_x86_regs
|> Regs.DwRegOptSet.filter
(fun x -> match x with None -> false | Some _ -> true)
|> Regs.DwRegOptSet.choose in
let has_mem_var = BStd.Var.Set.exists
~f:(fun x -> BStd.Var.name x = "mem")
free_vars in
(match reg, has_mem_var with
| Some dw_var, true when dw_var = Regs.X86_64.rsp -> true
| _ -> false)
| _ -> false
)
in
let is_rbp_expr expr =
let free_vars = BStd.Exp.free_vars expr in
let free_x86_regs = Regs.X86_64.map_varset free_vars in
(match Regs.DwRegOptSet.cardinal free_x86_regs with
| 1 ->
let reg = Regs.DwRegOptSet.choose free_x86_regs in
(match reg with
| Some dwreg when dwreg = Regs.X86_64.rbp -> true
| _ -> false)
| _ -> false)
in
let gather_rbp_intermed_var def cur_state =
(* If `def` is `some intermed. var <- rbp`, add this information in the
local state *)
(match is_rbp_expr @@ BStd.Def.rhs def with
| true ->
let lhs_var = BStd.Def.lhs def in
if (BStd.Var.is_virtual lhs_var
&& BStd.Var.typ lhs_var = BStd.reg64_t) then
(
(* This `def` is actually of the type we want to store. *)
let n_rbp_vars = BStd.Var.Set.add cur_state.rbp_vars lhs_var in
{ cur_state with rbp_vars = n_rbp_vars }
)
else
cur_state
| false -> cur_state
)
in
let out_rbp, new_state =
(match cur_rbp with
| RbpUndef ->
let cur_state = gather_rbp_intermed_var def local_state in
(* We assume that an expression is saving %rbp on the stack at the
address %rip when the current def is an expression of the kind
`MEM <- F(MEM, %rip, v)` where `v` is either `%rbp` or some
intermediary variable holding `%rbp`.
This approach is sound when %rbp is saved using a `push`, but
probably wrong when saved using a `mov` on some stack-space allocated
previously (eg. for multiple registers saved at once).
It would be far better to actually read the position at which `v` is
saved, but this requires parsing the actual rhs expression, which is
not easily done: FIXME
*)
let new_rbp =
if (BStd.Var.name @@ BStd.Def.lhs def = "mem"
&& is_rbp_save_expr (BStd.Def.rhs def) cur_state)
then
(match cur_cfa with
| RspOffset off ->
Some (RbpCfaOffset (Int64.mul Int64.minus_one off))
| _ -> raise UnexpectedRbpSet
)
else
None
in
new_rbp, cur_state
| RbpCfaOffset offs ->
(* We go back to RbpUndef when encountering something like a `pop rbp`,
that is, RBP <- f(RSP, mem) *)
(match Regs.X86_64.of_var (BStd.Def.lhs def),
is_pop_expr @@ BStd.Def.rhs def with
| Some reg, true when reg = Regs.X86_64.rbp ->
Some RbpUndef, local_state
| _ -> None, local_state
)
)
in
(match out_cfa, out_rbp with
| None, None -> None
| Some cfa, None -> Some (cfa, cur_rbp)
| None, Some rbp -> Some (cur_cfa, rbp)
| Some cfa, Some rbp -> Some (cfa, rbp)),
new_state
let process_jmp jmp (cur_reg: reg_pos)
: (reg_pos option) =
let cur_cfa, cur_rbp = cur_reg in
let gen_change = match cur_cfa with
| RspOffset cur_offset -> (fun off ->
let new_offset = Int64.add cur_offset (Int64.of_int off) in
Some (RspOffset(new_offset), cur_rbp)
)
| _ -> (fun _ -> None)
in
match (BStd.Jmp.kind jmp) with
| BStd.Call call -> gen_change (-8)
| BStd.Ret ret -> gen_change (8)
| _ -> None
let process_blk
next_instr_graph (block_init: reg_pos) blk : (reg_changes_fde * reg_pos) =
(** Extracts the registers (CFA+RBP) changes of a block. *)
let apply_offset cur_addr_opt ((accu:reg_changes_fde), cur_reg, local_state)
= function
| None -> (accu, cur_reg, local_state)
| Some reg_pos ->
let cur_addr = (match cur_addr_opt with
| None -> assert false
| Some x -> to_int64_addr x) in
(AddrSet.fold (fun n_addr cur_accu ->
AddrMap.add n_addr reg_pos cur_accu)
(AddrMap.find cur_addr next_instr_graph)
accu),
reg_pos,
local_state
in
let fold_elt (accu, cur_reg, cur_local_state) elt = match elt with
| `Def(def) ->
let new_offset, new_state = process_def cur_local_state def cur_reg in
apply_offset
(opt_addr_of def) (accu, cur_reg, new_state) new_offset
| `Jmp(jmp) ->
apply_offset
(opt_addr_of jmp) (accu, cur_reg, cur_local_state)
@@ process_jmp jmp cur_reg
| _ -> (accu, cur_reg, cur_local_state)
in
let init_changes = (match opt_addr_of blk with
| None -> AddrMap.empty
| Some x ->
let blk_address = to_int64_addr x in
AddrMap.singleton blk_address block_init
) in
let empty_local_state = {
rbp_vars = BStd.Var.Set.empty
} in
let elts_seq = BStd.Blk.elts blk in
let out_reg, end_reg, _ = BStd.Seq.fold elts_seq
~init:(init_changes, block_init, empty_local_state)
~f:fold_elt in
out_reg, end_reg
exception Inconsistent of BStd.tid
let get_entry_blk graph =
let entry = BStd.Seq.min_elt (CFG.nodes graph) ~cmp:(fun x y ->
let ax = opt_addr_of @@ CFG.Node.label x
and ay = opt_addr_of @@ CFG.Node.label y in
match ax, ay with
| None, None -> compare x y
| Some _, None -> -1
| None, Some _ -> 1
| Some ax, Some ay -> compare (to_int64_addr ax) (to_int64_addr ay))
in
match entry with
| None -> assert false
| Some x -> x
let find_last_addr sub =
(** Finds the maximal instruction address in a subroutine *)
let map_opt fl fr merge l r = match l, r with
| None, None -> None
| Some x, None -> Some (fl x)
| None, Some y -> Some (fr y)
| Some x, Some y -> Some (merge (fl x) (fr y))
in
let max_opt_addr_word = map_opt
(fun x -> x)
(fun y -> to_int64_addr y)
max
in
let max_opt_addr = map_opt
(fun x -> x)
(fun y -> y)
max
in
let max_def cur_max def =
max_opt_addr_word cur_max (opt_addr_of def)
in
let fold_res =
BStd.Seq.fold (BStd.Term.enum BStd.blk_t sub)
~init:None
~f:(fun cur_max blk ->
max_opt_addr
(BStd.Seq.fold (BStd.Term.enum BStd.def_t blk)
~init:cur_max
~f:max_def)
(BStd.Seq.fold (BStd.Term.enum BStd.jmp_t blk)
~init:cur_max
~f:max_def)
)
in
match fold_res with
| None -> Int64.zero
| Some x -> x
let cleanup_fde (fde_changes: reg_changes_fde) : reg_changes_fde =
(** Cleanup the result of `of_sub`.
Merges entries at the same address, propagates track lost *)
let fold_one addr reg_change (accu, last_change, lost_track) =
match reg_change, last_change, lost_track with
| _, _, true -> (accu, None, lost_track)
| (CfaLostTrack, _), _, false ->
(AddrMap.add addr reg_change accu, None, true)
| reg_change, Some prev_change, false when reg_change = prev_change ->
(accu, last_change, false)
| reg_change, _, false ->
(AddrMap.add addr reg_change accu, Some reg_change, false)
in
match AddrMap.fold fold_one fde_changes (AddrMap.empty, None, false) with
| out, _, _ -> out
let process_sub sub : subroutine_cfa_data =
(** Extracts the `cfa_changes_fde` of a subroutine *)
let cfg = BStd.Sub.to_cfg sub in
let next_instr_graph = build_next_instr cfg in
let first_addr = int64_addr_of sub in
let last_addr = find_last_addr sub in
let initial_cfa_rsp_offset = Int64.of_int 8 in
let rec dfs_process
(sub_changes: (reg_changes_fde * reg_pos) TIdMap.t)
node
(entry_offset: reg_pos) =
(** Processes one block *)
let cur_blk = CFG.Node.label node in
let tid = BStd.Term.tid @@ cur_blk in
match (TIdMap.find_opt tid sub_changes) with
| None ->
(* Not yet visited: compute the changes *)
let cur_blk_changes, end_reg =
process_blk next_instr_graph entry_offset cur_blk in
let n_sub_changes =
TIdMap.add tid (cur_blk_changes, entry_offset) sub_changes in
BStd.Seq.fold (CFG.Node.succs node cfg)
~f:(fun accu child -> dfs_process accu child end_reg)
~init:n_sub_changes
| Some (_, former_entry_offset) ->
(* Already visited: check that entry values are matching *)
if entry_offset <> former_entry_offset then
raise (Inconsistent tid)
else
sub_changes
in
let entry_blk = get_entry_blk cfg in
let initial_offset = (RspOffset initial_cfa_rsp_offset, RbpUndef) in
let changes_map = dfs_process TIdMap.empty entry_blk initial_offset in
let merged_changes = TIdMap.fold
(fun _ (cfa_changes, _) accu -> AddrMap.union (fun _ v1 v2 ->
if v1 = v2 then
Some v1
else
assert false)
cfa_changes accu)
changes_map
AddrMap.empty in
let reg_changes = cleanup_fde merged_changes in
let output = {
reg_changes_fde = reg_changes ;
beg_pos = first_addr ;
end_pos = last_addr ;
} in
output
let of_prog prog : subroutine_cfa_map =
(** Extracts the `cfa_changes` of a program *)
let fold_step accu sub =
(try
let subroutine_data = process_sub sub in
StrMap.add (BStd.Sub.name sub) subroutine_data accu
with
| InvalidSub -> accu
| Inconsistent tid ->
Format.eprintf "Inconsistent TId %a in subroutine %s, skipping.@."
BStd.Tid.pp tid (BStd.Sub.name sub);
accu
)
in
let subroutines = BStd.Term.enum BStd.sub_t prog in
BStd.Seq.fold subroutines
~init:StrMap.empty
~f:fold_step
let of_proj proj : subroutine_cfa_map =
(** Extracts the `cfa_changes` of a project *)
let prog = BStd.Project.program proj in
of_prog prog
let clean_lost_track_subs pre_dwarf : subroutine_cfa_map =
(** Removes the subroutines on which we lost track from [pre_dwarf] *)
let sub_lost_track sub_name (sub: subroutine_cfa_data) =
not @@ AddrMap.exists (fun addr pos ->
let cfa_pos, _ = pos in
(match cfa_pos with
| RspOffset _ | RbpOffset _ -> false
| CfaLostTrack -> true))
sub.reg_changes_fde
in
StrMap.filter sub_lost_track pre_dwarf