Generate CFA tracking with RIP

This commit is contained in:
Théophile Bastian 2018-10-23 19:21:57 +02:00
parent 194f3f8128
commit 4096c4bf19
12 changed files with 616 additions and 0 deletions

7
.merlin Normal file
View File

@ -0,0 +1,7 @@
S .
S DwarfSynth
B _build
B _build/DwarfSynth
PKG bap
PKG core_kernel
PKG bap_primus

6
DwarfSynth.mlpack Normal file
View File

@ -0,0 +1,6 @@
Main
Std
PreDwarf
Regs
OnlyUnwind
Simplest

5
DwarfSynth/Main.ml Normal file
View File

@ -0,0 +1,5 @@
open Std
let main outfile proj =
let pre_dwarf = Simplest.of_proj proj in
Format.printf "%a" Simplest.pp_cfa_changes pre_dwarf

123
DwarfSynth/OnlyUnwind.ml Normal file
View File

@ -0,0 +1,123 @@
(** Basic Pre-DWARF generation, only for unwinding data. Tracks only CFA,
and RA. *)
open Std
module CFG = BStd.Graphs.Ir
module BlkMap = Map.Make(BStd.Blk)
(** Add a new row (ie. register change) to a PreDwarf.reg_data table *)
let add_fde_row reg row (data: PreDwarf.reg_data) =
let prev_data = match Regs.RegMap.find_opt reg data with
| None -> []
| Some l -> l
in
let new_data = row :: prev_data in
Regs.RegMap.add reg new_data data
let fold_sub init fct_phi fct_def fct_jmp term =
(** Fold over the addresses of encountered subterms in `term`, a `sub` *)
let fold_term cls init fct cterm =
BStd.Seq.fold ~init:init ~f:fct @@ BStd.Term.enum cls cterm
in
fold_term BStd.blk_t init (fun accu -> fun blk ->
let accu_phi = fold_term BStd.phi_t accu fct_phi blk in
let accu_def = fold_term BStd.def_t accu_phi fct_def blk in
let accu_jmp = fold_term BStd.jmp_t accu_def fct_jmp blk in
accu_jmp) term
exception No_address
let get_term_addr term = match (BStd.Term.get_attr term BStd.address) with
(** Get the address of a given term *)
| None -> raise No_address
| Some addr -> addr
let extract_addr extractor term =
let examine_addr paddr cterm =
let naddr = get_term_addr cterm in extractor paddr naddr
in
let start_addr = get_term_addr term in
fold_sub start_addr examine_addr examine_addr examine_addr term
let low_addr term =
(** Find the low addr of some `BStd.term` *)
extract_addr min term
let high_addr term =
(** Find the high addr of some `BStd.term` (inclusive) *)
extract_addr max term
let addr_to_int addr =
(** Transforms an address (BStd.addr) into an int *)
try CKStd.Or_error.ok_exn @@ BStd.Word.to_int addr
with _ -> raise No_address
let fde_of_subroutine (sub: BStd.sub BStd.term): PreDwarf.fde =
PreDwarf.({
start_pc = addr_to_int @@ low_addr sub;
end_pc = addr_to_int @@ high_addr sub;
name = BStd.Term.name sub
})
let symbolic_predwarf_of_blk (blk: BStd.blk BStd.term) =
(** Analyze a block of code, and return its predwarf based on RSP, assuming
that at the beginning of the block, CFA = %rsp.
This will be then adjusted to be offseted by the actual CFA value upon
entrance in this block.
This function returns its offset by the end of its block.
*)
let fold_def accu elt =
let var = BStd.Def.lhs elt in
match Regs.X86_64.get_register var with
| None -> accu
| Some reg ->
accu (* TODO *)
in
let fold_elt accu elt = match elt with
| `Def(def) -> fold_def accu def
| _ -> accu
in
BStd.Seq.fold (BStd.Blk.elts blk)
~init:Regs.RegMap.empty
~f:fold_elt
let predwarf_of_sub
(fde:PreDwarf.fde)
(sub: BStd.sub BStd.term)
: PreDwarf.reg_data =
(** Compute the pre-dwarf data for a single subroutine/FDE *)
(* A `call` always result in %rsp pointing to the RA *)
let init_cfa =
[(PreDwarf.(fde.start_pc), Regs.(RegOffset(X86_64.rsp, 8)))] in
let init_ra =
[(PreDwarf.(fde.start_pc), Regs.(RegOffset(DwRegCFA, -8)))] in
let init_reg_data = Regs.(RegMap.(
add DwRegRA init_ra
@@ add DwRegCFA init_cfa
@@ empty
))
in
let sub_cfg = BStd.Sub.to_cfg sub in
assert false
let compute_pre_dwarf proj: PreDwarf.pre_dwarf_data =
let prog = BStd.Project.program proj in
let subroutines = BStd.Term.enum BStd.sub_t prog in
let subdwarf = BStd.Seq.fold subroutines
~init:PreDwarf.FdeMap.empty
~f:(fun accu sub ->
let fde = fde_of_subroutine sub in
let predwarf = predwarf_of_sub fde sub in
PreDwarf.FdeMap.add fde predwarf accu) in
subdwarf

25
DwarfSynth/PreDwarf.ml Normal file
View File

@ -0,0 +1,25 @@
(** A program counter value *)
type pc = int
(** A notice that the register save location has changed to this new value,
starting from the given pc *)
type reg_change = pc * Regs.reg_loc
(** A structure holding, for each register, a list of changes. Used to track
the evolution of the storage location of every register. *)
type reg_data = reg_change list Regs.RegMap.t
(** Represents a FDE (that is, morally, a function). *)
type fde = {
start_pc: pc;
end_pc: pc;
name: string;
}
(** A map of FDEs *)
module FdeMap = Map.Make(struct
type t = fde
let compare = compare
end)
type pre_dwarf_data = reg_data FdeMap.t

74
DwarfSynth/Regs.ml Normal file
View File

@ -0,0 +1,74 @@
(** Defines everything related to registers. Architecture-independant. *)
open Std
module StrMap = Map.Make(String)
(** A list of possible registers *)
type dwarf_reg =
DwReg of int (** A register identified by some ID *)
| DwRegCFA (** The Canonical Frame Address virtual register *)
| DwRegRA (** The Return Address virtual register *)
(** A set of options of dwarf_reg. Useful to map BAP vars onto. *)
module DwRegOptSet = Set.Make(struct
type t = dwarf_reg option
let compare = compare
end)
type mem_offset = int
module RegMap = Map.Make(struct
type t=dwarf_reg
let compare r1 r2 = match (r1, r2) with
| DwReg(rid1), DwReg(rid2) -> compare rid1 rid2
| r1, r2 -> Pervasives.compare r1 r2
(* r1 and r2 are not both DwReg, so we can safely compare
them using the Pervasives compare *)
end)
type reg_loc = RegOffset of dwarf_reg * mem_offset
let is_register var =
let physical = BStd.Var.is_physical var in
let typ = BStd.Var.typ var in
let register = BStd.Type.(match typ with Imm(_) -> true | _ -> false) in
physical && register
module X86_64 = struct
let rax = DwReg(0)
let rdx = DwReg(1)
let rcx = DwReg(2)
let rbx = DwReg(3)
let rsi = DwReg(4)
let rdi = DwReg(5)
let rbp = DwReg(6)
let rsp = DwReg(7)
let r8 = DwReg(8)
let r9 = DwReg(9)
let r10 = DwReg(10)
let r11 = DwReg(11)
let r12 = DwReg(12)
let r13 = DwReg(13)
let r14 = DwReg(14)
let r15 = DwReg(15)
let rip = DwReg(16)
let name_map = StrMap.add "RSP" rsp StrMap.empty (* TODO *)
let get_register reg = match is_register reg with
| false -> None
| true ->
(try Some (StrMap.find (BStd.Var.name reg) name_map)
with Not_found -> None)
let of_var var =
match is_register var with
| false -> None
| true -> StrMap.find_opt (BStd.Var.name var) name_map
let map_varset varset =
BStd.Var.Set.fold varset
~init:DwRegOptSet.empty
~f:(fun accu elt -> DwRegOptSet.add (of_var elt) accu)
end

320
DwarfSynth/Simplest.ml Normal file
View File

@ -0,0 +1,320 @@
open Std
module CFG = BStd.Graphs.Ir
type memory_offset = int64
type cfa_pos =
RspOffset of memory_offset
| RbpOffset of memory_offset
| CfaLostTrack
type cfa_change = CfaChange of BStd.word * cfa_pos
type cfa_changes_fde = cfa_change list
module StrMap = Map.Make(String)
type cfa_changes = cfa_changes_fde StrMap.t
module TIdMap = Map.Make(BStd.Tid)
exception InvalidSub
let pp_cfa_pos ppx = function
| RspOffset off -> Format.fprintf ppx "RSP + (%s)@." (Int64.to_string off)
| RbpOffset off -> Format.fprintf ppx "RBP + (%s)@." (Int64.to_string off)
| CfaLostTrack -> Format.fprintf ppx "??@."
let pp_cfa_change ppx = function CfaChange(addr, cfa_pos) ->
Format.fprintf ppx "%a: %a" BStd.Word.pp_hex addr pp_cfa_pos cfa_pos
let pp_cfa_changes_fde ppx = List.iter (pp_cfa_change ppx)
let pp_cfa_changes ppx =
StrMap.iter (fun fde_name entry ->
Format.fprintf ppx "%s@\n====@\n@\n%a@." fde_name
pp_cfa_changes_fde entry)
let pp_option_of sub_pp ppx = function
| None -> Format.fprintf ppx "None"
| Some x -> Format.fprintf ppx "Some %a" sub_pp x
let addr_of term =
(** Get the address of a term *)
match BStd.Term.get_attr term BStd.address with
| None -> assert false
| Some addr -> addr
let interpret_var_expr c_var offset expr = BStd.Bil.(
let closed_form = BStd.Exp.substitute
(var c_var)
(int (BStd.Word.of_int64 offset))
expr
in
let res = BStd.Exp.eval closed_form in
match res with
| Imm value ->
Some (BStd.Word.to_int64_exn @@ BStd.Word.signed value)
| _ -> None
)
let process_def def (cur_offset: memory_offset)
: ((cfa_change * memory_offset) option) =
let lose_track addr =
Some (CfaChange(addr, CfaLostTrack), Int64.zero)
in
(match Regs.X86_64.of_var (BStd.Def.lhs def) with
| Some reg when reg = Regs.X86_64.rsp ->
let exp = BStd.Def.rhs def in
let free_vars = BStd.Exp.free_vars exp in
let free_x86_regs = Regs.X86_64.map_varset free_vars in
(match Regs.DwRegOptSet.cardinal free_x86_regs with
| 1 ->
let free_var = Regs.DwRegOptSet.choose free_x86_regs in
(match free_var with
| Some dw_var when dw_var = Regs.X86_64.rsp ->
let bil_var = (match BStd.Var.Set.choose free_vars with
| None -> assert false
| Some x -> x) in
let interpreted = interpret_var_expr bil_var cur_offset exp in
(match interpreted with
| None -> lose_track (addr_of def)
| Some interp_val ->
let gap = Int64.sub interp_val cur_offset in
let new_offset = Int64.sub cur_offset gap in
Some (CfaChange(addr_of def, RspOffset(new_offset)), new_offset)
)
| _ -> lose_track (addr_of def)
)
| _ -> lose_track @@ addr_of def
)
| _ -> None)
let process_jmp jmp (cur_offset: memory_offset)
: ((cfa_change * memory_offset) option) =
let gen_change off =
let new_offset = Int64.add cur_offset (Int64.of_int off) in
Some (CfaChange(addr_of jmp, RspOffset(new_offset)), new_offset)
in
match (BStd.Jmp.kind jmp) with
| BStd.Call call -> gen_change (-8)
| BStd.Ret ret -> gen_change (8)
| _ -> None
let sym_of_blk blk : cfa_changes_fde =
(** Extracts the symbolic CFA changes of a block. These changes assume that
at the beginning of the block, CFA = RspOffset(0) and will be offset
after *)
let apply_offset (accu, cur_offset) = function
| None -> (accu, cur_offset)
| Some (change, n_offset) -> (change :: accu, n_offset)
in
let fold_elt (accu, cur_offset) elt = match elt with
| `Def(def) ->
apply_offset (accu, cur_offset) @@ process_def def cur_offset
| `Jmp(jmp) ->
apply_offset (accu, cur_offset) @@ process_jmp jmp cur_offset
| _ -> (accu, cur_offset)
in
let elts_seq = BStd.Blk.elts blk in
let out, end_offset = BStd.Seq.fold elts_seq
~init:([], Int64.zero)
~f:fold_elt in
out
let end_offset (changelist: cfa_changes_fde): memory_offset option =
match changelist with
| CfaChange(_, RspOffset(x)) :: _ -> Some x
| _ -> None
exception Inconsistent of BStd.tid
let rec dfs_propagate changemap propagated parent_val node graph =
let c_tid = BStd.Term.tid @@ CFG.Node.label node in
match TIdMap.find_opt c_tid propagated with
| Some x ->
if x = parent_val then
(* Already propagated and consistent, all fine *)
propagated
else
(* Already propagated with a different value: inconsistency *)
raise (Inconsistent c_tid)
| None ->
let n_propagated = TIdMap.add c_tid parent_val propagated in
let outwards = CFG.Node.outputs node graph in
let self_entry = TIdMap.find c_tid changemap in
let offset = (match end_offset self_entry with
Some x -> x
| None -> Int64.zero) in
let cur_val = Int64.add parent_val offset in
BStd.Seq.fold outwards
~init:n_propagated
~f:(fun accu edge ->
dfs_propagate changemap accu cur_val (CFG.Edge.dst edge) graph)
let get_entry_blk graph =
let entry =
BStd.Seq.find (CFG.nodes graph)
(fun node -> BStd.Seq.is_empty @@ CFG.Node.inputs node graph)
in match entry with
| None -> assert false
| Some x -> x
let term_addr term =
BStd.Term.get_attr term BStd.address
let same_keys map1 map2 =
let exists_in_other other key _ =
TIdMap.mem key other in
TIdMap.for_all (exists_in_other map2) map1
&& TIdMap.for_all (exists_in_other map1) map2
let of_sub sub : cfa_changes_fde =
(** Extracts the `cfa_changes_fde` of a subroutine *)
Format.eprintf "Sub %s...@." @@ BStd.Sub.name sub ;
let initial_cfa_rsp_offset = Int64.of_int 8 in
let store_sym accu blk =
let blk = CFG.Node.label blk in
let res = sym_of_blk blk in
TIdMap.add (BStd.Term.tid blk) res accu
in
let node_addr nd = term_addr @@ CFG.Node.label nd in
let merge_corrected blk_tid changes offset = match (changes, offset) with
| Some changes, Some offset ->
Some (
List.map (fun (CfaChange(addr, pos)) -> match pos with
RspOffset(off) -> CfaChange(addr,
RspOffset(Int64.add off offset))
| RbpOffset(off) -> CfaChange(addr, RbpOffset(off))
| CfaLostTrack -> CfaChange(addr, CfaLostTrack)
)
changes
)
| _ -> None
in
let cfg = BStd.Sub.to_cfg sub in
let tid_match = BStd.Seq.fold (CFG.nodes cfg)
~init:TIdMap.empty
~f:(fun accu node ->
let tid = BStd.Term.tid @@ CFG.Node.label node in
TIdMap.add tid node accu)
in
let blk_sym = BStd.Seq.fold
~init:TIdMap.empty
~f:store_sym
@@ CFG.nodes cfg
in
let entry_blk = get_entry_blk cfg in
let offset_map = dfs_propagate
blk_sym (TIdMap.empty) initial_cfa_rsp_offset entry_blk cfg in
let corrected = TIdMap.merge merge_corrected blk_sym offset_map in
let is_connex = same_keys tid_match corrected in
if not is_connex then
raise InvalidSub ;
let tid_list = TIdMap.bindings tid_match in
let sorted_blk = List.sort (fun (tid1, bl1) (tid2, bl2) ->
let res = match (node_addr bl1, node_addr bl2) with
| Some addr1, Some addr2 -> compare addr1 addr2
| Some _, None -> 1
| None, Some _ -> -1
| None, None -> compare tid1 tid2
in
-res)
tid_list
in
let out = List.fold_left
(fun accu blk ->
let changes = TIdMap.find blk corrected in
List.fold_left (fun accu chg -> chg::accu)
accu
changes
)
[]
(List.map (fun (x, y) -> x) sorted_blk) in
let sub_addr = (match term_addr sub with
| Some x -> x
| None -> assert false) in
let init = [
CfaChange(sub_addr, RspOffset(initial_cfa_rsp_offset))
] in
init @ out
let cleanup_fde (fde_changes: cfa_changes_fde) : cfa_changes_fde =
(** Cleanup the result of `of_sub`.
Merges entries at the same address, propagates track lost *)
let fold_one (accu, (last_commit:cfa_pos option), in_flight, lost_track) = function
| CfaChange(addr, cfa_change) as cur_change -> (
match lost_track, in_flight, cfa_change with
| true, _, _ ->
(* Already lost track: give up *)
(accu, last_commit, None, lost_track)
| false, _, CfaLostTrack ->
(* Just lost track: give up the operation on flight as well *)
(cur_change :: accu, None, None, true)
| _, Some CfaChange(flight_addr, flight_chg), _
when flight_addr = addr ->
(* On flight address matches current address: continue flying *)
accu, last_commit, Some cur_change, false
| _, Some CfaChange(_, in_flight_inner_pos), _
when last_commit = Some in_flight_inner_pos ->
(* Doesn't match anymore, but there was some operation in flight,
which has the same result as what was last committed. Discard. *)
(accu, last_commit, Some cur_change, false)
| _, Some (CfaChange(_, in_flight_inner_pos) as in_flight_inner), _ ->
(* Doesn't match anymore, but there was some operation in flight:
commit it, put the new one in flight *)
(in_flight_inner :: accu, Some in_flight_inner_pos,
Some cur_change, false)
| _, None, _ ->
(* No operation in flight: put the new one in flight *)
(accu, last_commit, Some cur_change, false)
)
in
let extract_end_value (accu, _, in_flight, lost_track) =
List.rev @@ match lost_track, in_flight with
| true, _ -> accu
| false, None -> accu
| false, Some x -> x :: accu
in
extract_end_value
@@ List.fold_left fold_one ([], None, None, false) fde_changes
let of_prog prog : cfa_changes =
(** Extracts the `cfa_changes` of a program *)
let fold_step accu sub =
(try
let res = (*cleanup_fde @@ *)of_sub sub in
StrMap.add (BStd.Sub.name sub) res accu
with InvalidSub -> accu)
in
let subroutines = BStd.Term.enum BStd.sub_t prog in
BStd.Seq.fold subroutines
~init:StrMap.empty
~f:fold_step
let of_proj proj : cfa_changes =
(** Extracts the `cfa_changes` of a project *)
let prog = BStd.Project.program proj in
of_prog prog

3
DwarfSynth/Std.ml Normal file
View File

@ -0,0 +1,3 @@
module BStd = Bap.Std
module CKStd = Core_kernel.Std

18
Makefile Normal file
View File

@ -0,0 +1,18 @@
OCAMLBUILD=bapbuild
BAPBUNDLE=bapbundle
ROOT_MODULE=dwarfsynth
all: install
.PHONY: $(ROOT_MODULE).plugin
$(ROOT_MODULE).plugin:
$(OCAMLBUILD) $(ROOT_MODULE).plugin
.PHONY: install
install: $(ROOT_MODULE).plugin
$(BAPBUNDLE) install $<
.PHONY: clean
clean:
rm -rf _build

8
_tags Normal file
View File

@ -0,0 +1,8 @@
true: bin_annot
"DwarfSynth": include
<DwarfSynth/Main.cmx>: for-pack(DwarfSynth)
<DwarfSynth/Std.cmx>: for-pack(DwarfSynth)
<DwarfSynth/PreDwarf.cmx>: for-pack(DwarfSynth)
<DwarfSynth/Regs.cmx>: for-pack(DwarfSynth)
<DwarfSynth/OnlyUnwind.cmx>: for-pack(DwarfSynth)
<DwarfSynth/Simplest.cmx>: for-pack(DwarfSynth)

26
dwarfsynth.ml Normal file
View File

@ -0,0 +1,26 @@
(** dwarfsynth
*
* Entry point for the BAP plugin `dwarfsynth`, defining the command line
* interface
**)
module Self = struct
include Bap.Std.Self()
end
let main = DwarfSynth.Main.main
module Cmdline = struct
module Cnf = Self.Config
let outfile = Cnf.(
param (some string) "output"
~doc:("The file in which the output ELF will be written. Output goes "
^ "to stdout by default.")
)
let () = Cnf.(
when_ready ((fun {get=(!!)} ->
Bap.Std.Project.register_pass' (main !!outfile)))
)
end

1
dwarfsynth.plugin Symbolic link
View File

@ -0,0 +1 @@
/home/tobast/cours/internship/src/dwarf-synthesis/_build/dwarfsynth.plugin