dwarf-synthesis/DwarfSynth/Simplest.ml

444 lines
14 KiB
OCaml

open Std
module CFG = BStd.Graphs.Ir
type memory_offset = int64
type memory_address = int64
module AddrMap = Map.Make(Int64)
module AddrSet = Set.Make(Int64)
type cfa_pos =
RspOffset of memory_offset
| RbpOffset of memory_offset
| CfaLostTrack
type cfa_change = CfaChange of memory_address * cfa_pos
type cfa_changes_fde = cfa_change list
module StrMap = Map.Make(String)
type cfa_changes = cfa_changes_fde StrMap.t
module TIdMap = Map.Make(BStd.Tid)
exception InvalidSub
let pp_cfa_pos ppx = function
| RspOffset off -> Format.fprintf ppx "RSP + (%s)@." (Int64.to_string off)
| RbpOffset off -> Format.fprintf ppx "RBP + (%s)@." (Int64.to_string off)
| CfaLostTrack -> Format.fprintf ppx "??@."
let pp_int64_hex ppx number =
let mask_short = Int64.(pred (shift_left one 16)) in
let pp_short number =
Format.fprintf ppx "%04x" Int64.(to_int (logand number mask_short))
in
List.iter pp_short @@ List.map (fun x ->
Int64.(shift_right number (16*x))) [3;2;1;0]
let pp_cfa_change ppx = function CfaChange(addr, cfa_pos) ->
Format.fprintf ppx "%a: %a" pp_int64_hex addr pp_cfa_pos cfa_pos
let pp_cfa_changes_fde ppx = List.iter (pp_cfa_change ppx)
let pp_cfa_changes ppx =
StrMap.iter (fun fde_name entry ->
Format.fprintf ppx "%s@\n====@\n@\n%a@." fde_name
pp_cfa_changes_fde entry)
let pp_option_of sub_pp ppx = function
| None -> Format.fprintf ppx "None"
| Some x -> Format.fprintf ppx "Some %a" sub_pp x
let opt_addr_of term =
(** Get the address of a term as an option, if it has one*)
BStd.Term.get_attr term BStd.address
let addr_of term =
(** Get the address of a term *)
match opt_addr_of term with
| None -> assert false
| Some addr -> addr
let opt_addr_of_blk_elt = function
| `Def def -> opt_addr_of def
| `Jmp jmp -> opt_addr_of jmp
| `Phi phi -> opt_addr_of phi
let entrypoint_address blk =
(** Find the first instruction address in the current block.
Return None if no instruction has address. *)
let fold_one accu cur_elt = match accu, opt_addr_of_blk_elt cur_elt with
| None, None -> None
| None, Some x -> Some x
| _, _ -> accu
in
BStd.Seq.fold (BStd.Blk.elts blk)
~init:None
~f:fold_one
let to_int64_addr addr =
BStd.Word.to_int64_exn addr
let int64_addr_of x = to_int64_addr @@ addr_of x
let map_option f = function
| None -> None
| Some x -> Some (f x)
let build_next_instr graph =
(** Build a map of memory_address -> AddrSet.t holding, for each address, the
set of instructions coming right after the instruction at given address.
There might be multiple such addresses, if the current instruction is at
a point of branching. *)
let addresses_in_block blk =
(** Set of addresses present in the block *)
BStd.Seq.fold (BStd.Blk.elts blk)
~init:AddrSet.empty
~f:(fun accu elt ->
let addr = opt_addr_of_blk_elt elt in
match addr with
| None -> accu
| Some x ->
(try
AddrSet.add (BStd.Word.to_int64_exn x) accu
with _ -> accu)
)
in
let node_successors_addr (nd: CFG.node) : AddrSet.t =
let rec do_find_succ accu nd =
let fold_one accu c_node =
match entrypoint_address (CFG.Node.label c_node) with
| Some addr ->
(try
AddrSet.add (BStd.Word.to_int64_exn addr) accu
with _ -> accu)
| None -> do_find_succ accu c_node
in
let succ = CFG.Node.succs nd graph in
BStd.Seq.fold succ
~init:accu
~f:fold_one
in
do_find_succ AddrSet.empty nd
in
let build_of_block accu_map node =
let blk = CFG.Node.label node in
let node_successors = node_successors_addr node in
let instr_addresses = AddrSet.elements @@ addresses_in_block blk in
let rec accumulate_mappings mappings addr_list = function
| None -> mappings
| Some (instr, instr_seq) as cur_instr ->
let instr_addr = opt_addr_of_blk_elt instr in
match (map_option to_int64_addr instr_addr), addr_list with
| None, _ ->
accumulate_mappings mappings addr_list @@ BStd.Seq.next instr_seq
| Some cur_addr, next_addr::t when cur_addr >= next_addr ->
accumulate_mappings mappings t cur_instr
| Some cur_addr, next_addr::_ ->
let n_mappings = AddrMap.add
cur_addr (AddrSet.singleton next_addr) mappings in
accumulate_mappings n_mappings addr_list @@ BStd.Seq.next instr_seq
| Some cur_addr, [] ->
let n_mappings = AddrMap.add
cur_addr node_successors mappings in
accumulate_mappings n_mappings addr_list @@ BStd.Seq.next instr_seq
in
accumulate_mappings
accu_map
instr_addresses
(BStd.Seq.next @@ BStd.Blk.elts blk)
in
BStd.Seq.fold (CFG.nodes graph)
~init:AddrMap.empty
~f:build_of_block
let interpret_var_expr c_var offset expr = BStd.Bil.(
let closed_form = BStd.Exp.substitute
(var c_var)
(int (BStd.Word.of_int64 (Int64.neg offset)))
expr
in
let res = BStd.Exp.eval closed_form in
match res with
| Imm value ->
Some (Int64.neg @@ BStd.Word.to_int64_exn @@ BStd.Word.signed value)
| _ -> None
)
let process_def def (cur_offset: memory_offset)
: ((cfa_pos * memory_offset) option) =
let lose_track = Some (CfaLostTrack, Int64.zero) in
(match Regs.X86_64.of_var (BStd.Def.lhs def) with
| Some reg when reg = Regs.X86_64.rsp ->
let exp = BStd.Def.rhs def in
let free_vars = BStd.Exp.free_vars exp in
let free_x86_regs = Regs.X86_64.map_varset free_vars in
(match Regs.DwRegOptSet.cardinal free_x86_regs with
| 1 ->
let free_var = Regs.DwRegOptSet.choose free_x86_regs in
(match free_var with
| Some dw_var when dw_var = Regs.X86_64.rsp ->
let bil_var = (match BStd.Var.Set.choose free_vars with
| None -> assert false
| Some x -> x) in
let interpreted = interpret_var_expr bil_var cur_offset exp in
(match interpreted with
| None -> lose_track
| Some new_offset ->
Some (RspOffset(new_offset), new_offset)
)
| _ -> lose_track
)
| _ -> lose_track
)
| _ -> None)
let process_jmp jmp (cur_offset: memory_offset)
: ((cfa_pos * memory_offset) option) =
let gen_change off =
let new_offset = Int64.add cur_offset (Int64.of_int off) in
Some (RspOffset(new_offset), new_offset)
in
match (BStd.Jmp.kind jmp) with
| BStd.Call call -> gen_change (-8)
| BStd.Ret ret -> gen_change (8)
| _ -> None
let sym_of_blk next_instr_graph blk : cfa_changes_fde =
(** Extracts the symbolic CFA changes of a block. These changes assume that
at the beginning of the block, CFA = RspOffset(0) and will be offset
after *)
let apply_offset cur_addr_opt ((accu:cfa_change list), cur_offset) = function
| None -> (accu, cur_offset)
| Some (pos, n_offset) ->
let cur_addr = (match cur_addr_opt with
| None -> assert false
| Some x -> to_int64_addr x) in
(AddrSet.fold (fun n_addr cur_accu ->
let change = CfaChange(n_addr, pos) in
(change :: cur_accu))
(AddrMap.find cur_addr next_instr_graph)
accu),
n_offset
in
let fold_elt (accu, cur_offset) elt = match elt with
| `Def(def) ->
apply_offset
(opt_addr_of def) (accu, cur_offset) @@ process_def def cur_offset
| `Jmp(jmp) ->
apply_offset
(opt_addr_of jmp) (accu, cur_offset) @@ process_jmp jmp cur_offset
| _ -> (accu, cur_offset)
in
let elts_seq = BStd.Blk.elts blk in
let out, end_offset = BStd.Seq.fold elts_seq
~init:([], Int64.zero)
~f:fold_elt in
out
let end_offset (changelist: cfa_changes_fde): memory_offset option =
match changelist with
| CfaChange(_, RspOffset(x)) :: _ -> Some x
| _ -> None
exception Inconsistent of BStd.tid
let rec dfs_propagate changemap propagated parent_val node graph =
let c_tid = BStd.Term.tid @@ CFG.Node.label node in
match TIdMap.find_opt c_tid propagated with
| Some x ->
if x = parent_val then
(* Already propagated and consistent, all fine *)
propagated
else
(* Already propagated with a different value: inconsistency *)
raise (Inconsistent c_tid)
| None ->
let n_propagated = TIdMap.add c_tid parent_val propagated in
let outwards = CFG.Node.outputs node graph in
let self_entry = TIdMap.find c_tid changemap in
let offset = (match end_offset self_entry with
Some x -> x
| None -> Int64.zero) in
let cur_val = Int64.add parent_val offset in
BStd.Seq.fold outwards
~init:n_propagated
~f:(fun accu edge ->
dfs_propagate changemap accu cur_val (CFG.Edge.dst edge) graph)
let get_entry_blk graph =
let entry = BStd.Seq.min_elt (CFG.nodes graph) ~cmp:(fun x y ->
let ax = opt_addr_of @@ CFG.Node.label x
and ay = opt_addr_of @@ CFG.Node.label y in
match ax, ay with
| None, None -> compare x y
| Some _, None -> -1
| None, Some _ -> 1
| Some ax, Some ay -> compare (to_int64_addr ax) (to_int64_addr ay))
in
match entry with
| None -> assert false
| Some x -> x
let same_keys map1 map2 =
let exists_in_other other key _ =
TIdMap.mem key other in
TIdMap.for_all (exists_in_other map2) map1
&& TIdMap.for_all (exists_in_other map1) map2
let of_sub sub : cfa_changes_fde =
(** Extracts the `cfa_changes_fde` of a subroutine *)
Format.eprintf "Sub %s...@." @@ BStd.Sub.name sub ;
let cfg = BStd.Sub.to_cfg sub in
let next_instr_graph = build_next_instr cfg in
let initial_cfa_rsp_offset = Int64.of_int 8 in
let store_sym accu blk =
let blk = CFG.Node.label blk in
let res = sym_of_blk next_instr_graph blk in
TIdMap.add (BStd.Term.tid blk) res accu
in
let node_addr nd = opt_addr_of @@ CFG.Node.label nd in
let merge_corrected blk_tid changes offset = match (changes, offset) with
| Some changes, Some offset ->
Some (
List.map (fun (CfaChange(addr, pos)) -> match pos with
RspOffset(off) -> CfaChange(addr,
RspOffset(Int64.add off offset))
| RbpOffset(off) -> CfaChange(addr, RbpOffset(off))
| CfaLostTrack -> CfaChange(addr, CfaLostTrack)
)
changes
)
| _ -> None
in
let tid_match = BStd.Seq.fold (CFG.nodes cfg)
~init:TIdMap.empty
~f:(fun accu node ->
let tid = BStd.Term.tid @@ CFG.Node.label node in
TIdMap.add tid node accu)
in
let blk_sym = BStd.Seq.fold
~init:TIdMap.empty
~f:store_sym
@@ CFG.nodes cfg
in
let entry_blk = get_entry_blk cfg in
let offset_map = dfs_propagate
blk_sym (TIdMap.empty) initial_cfa_rsp_offset entry_blk cfg in
let corrected = TIdMap.merge merge_corrected blk_sym offset_map in
let is_connex = same_keys tid_match corrected in
if not is_connex then
raise InvalidSub ;
let tid_list = TIdMap.bindings tid_match in
let sorted_blk = List.sort (fun (tid1, bl1) (tid2, bl2) ->
let res = match (node_addr bl1, node_addr bl2) with
| Some addr1, Some addr2 -> compare addr1 addr2
| Some _, None -> 1
| None, Some _ -> -1
| None, None -> compare tid1 tid2
in
-res)
tid_list
in
let out = List.fold_left
(fun accu blk ->
let changes = TIdMap.find blk corrected in
List.fold_left (fun accu chg -> chg::accu)
accu
changes
)
[]
(List.map (fun (x, y) -> x) sorted_blk) in
let init = [
CfaChange(int64_addr_of sub, RspOffset(initial_cfa_rsp_offset))
] in
init @ out
let cleanup_fde (fde_changes: cfa_changes_fde) : cfa_changes_fde =
(** Cleanup the result of `of_sub`.
Merges entries at the same address, propagates track lost *)
let fold_one (accu, (last_commit:cfa_pos option), in_flight, lost_track) = function
| CfaChange(addr, cfa_change) as cur_change -> (
match lost_track, in_flight, cfa_change with
| true, _, _ ->
(* Already lost track: give up *)
(accu, last_commit, None, lost_track)
| false, _, CfaLostTrack ->
(* Just lost track: give up the operation on flight as well *)
(cur_change :: accu, None, None, true)
| _, Some CfaChange(flight_addr, flight_chg), _
when flight_addr = addr ->
(* On flight address matches current address: continue flying *)
accu, last_commit, Some cur_change, false
| _, Some CfaChange(_, in_flight_inner_pos), _
when last_commit = Some in_flight_inner_pos ->
(* Doesn't match anymore, but there was some operation in flight,
which has the same result as what was last committed. Discard. *)
(accu, last_commit, Some cur_change, false)
| _, Some (CfaChange(_, in_flight_inner_pos) as in_flight_inner), _ ->
(* Doesn't match anymore, but there was some operation in flight:
commit it, put the new one in flight *)
(in_flight_inner :: accu, Some in_flight_inner_pos,
Some cur_change, false)
| _, None, _ ->
(* No operation in flight: put the new one in flight *)
(accu, last_commit, Some cur_change, false)
)
in
let extract_end_value (accu, _, in_flight, lost_track) =
List.rev @@ match lost_track, in_flight with
| true, _ -> accu
| false, None -> accu
| false, Some x -> x :: accu
in
extract_end_value
@@ List.fold_left fold_one ([], None, None, false) fde_changes
let of_prog prog : cfa_changes =
(** Extracts the `cfa_changes` of a program *)
let fold_step accu sub =
(try
let res = cleanup_fde @@ of_sub sub in
StrMap.add (BStd.Sub.name sub) res accu
with InvalidSub -> accu)
in
let subroutines = BStd.Term.enum BStd.sub_t prog in
BStd.Seq.fold subroutines
~init:StrMap.empty
~f:fold_step
let of_proj proj : cfa_changes =
(** Extracts the `cfa_changes` of a project *)
let prog = BStd.Project.program proj in
of_prog prog