diff --git a/.merlin b/.merlin new file mode 100644 index 0000000..9d80618 --- /dev/null +++ b/.merlin @@ -0,0 +1,7 @@ +S . +S DwarfSynth +B _build +B _build/DwarfSynth +PKG bap +PKG core_kernel +PKG bap_primus diff --git a/DwarfSynth.mlpack b/DwarfSynth.mlpack new file mode 100644 index 0000000..b4deb07 --- /dev/null +++ b/DwarfSynth.mlpack @@ -0,0 +1,6 @@ +Main +Std +PreDwarf +Regs +OnlyUnwind +Simplest diff --git a/DwarfSynth/Main.ml b/DwarfSynth/Main.ml new file mode 100644 index 0000000..08f3836 --- /dev/null +++ b/DwarfSynth/Main.ml @@ -0,0 +1,5 @@ +open Std + +let main outfile proj = + let pre_dwarf = Simplest.of_proj proj in + Format.printf "%a" Simplest.pp_cfa_changes pre_dwarf diff --git a/DwarfSynth/OnlyUnwind.ml b/DwarfSynth/OnlyUnwind.ml new file mode 100644 index 0000000..e43db45 --- /dev/null +++ b/DwarfSynth/OnlyUnwind.ml @@ -0,0 +1,123 @@ +(** Basic Pre-DWARF generation, only for unwinding data. Tracks only CFA, + and RA. *) + +open Std +module CFG = BStd.Graphs.Ir + +module BlkMap = Map.Make(BStd.Blk) + +(** Add a new row (ie. register change) to a PreDwarf.reg_data table *) +let add_fde_row reg row (data: PreDwarf.reg_data) = + let prev_data = match Regs.RegMap.find_opt reg data with + | None -> [] + | Some l -> l + in + let new_data = row :: prev_data in + Regs.RegMap.add reg new_data data + +let fold_sub init fct_phi fct_def fct_jmp term = + (** Fold over the addresses of encountered subterms in `term`, a `sub` *) + let fold_term cls init fct cterm = + BStd.Seq.fold ~init:init ~f:fct @@ BStd.Term.enum cls cterm + in + + fold_term BStd.blk_t init (fun accu -> fun blk -> + let accu_phi = fold_term BStd.phi_t accu fct_phi blk in + let accu_def = fold_term BStd.def_t accu_phi fct_def blk in + let accu_jmp = fold_term BStd.jmp_t accu_def fct_jmp blk in + accu_jmp) term + +exception No_address + +let get_term_addr term = match (BStd.Term.get_attr term BStd.address) with + (** Get the address of a given term *) + | None -> raise No_address + | Some addr -> addr + +let extract_addr extractor term = + let examine_addr paddr cterm = + let naddr = get_term_addr cterm in extractor paddr naddr + in + let start_addr = get_term_addr term in + fold_sub start_addr examine_addr examine_addr examine_addr term + +let low_addr term = + (** Find the low addr of some `BStd.term` *) + extract_addr min term + +let high_addr term = + (** Find the high addr of some `BStd.term` (inclusive) *) + extract_addr max term + +let addr_to_int addr = + (** Transforms an address (BStd.addr) into an int *) + try CKStd.Or_error.ok_exn @@ BStd.Word.to_int addr + with _ -> raise No_address + +let fde_of_subroutine (sub: BStd.sub BStd.term): PreDwarf.fde = + PreDwarf.({ + start_pc = addr_to_int @@ low_addr sub; + end_pc = addr_to_int @@ high_addr sub; + name = BStd.Term.name sub + }) + +let symbolic_predwarf_of_blk (blk: BStd.blk BStd.term) = + (** Analyze a block of code, and return its predwarf based on RSP, assuming + that at the beginning of the block, CFA = %rsp. + + This will be then adjusted to be offseted by the actual CFA value upon + entrance in this block. + + This function returns its offset by the end of its block. + *) + + let fold_def accu elt = + let var = BStd.Def.lhs elt in + + match Regs.X86_64.get_register var with + | None -> accu + | Some reg -> + accu (* TODO *) + in + + let fold_elt accu elt = match elt with + | `Def(def) -> fold_def accu def + | _ -> accu + in + + BStd.Seq.fold (BStd.Blk.elts blk) + ~init:Regs.RegMap.empty + ~f:fold_elt + +let predwarf_of_sub + (fde:PreDwarf.fde) + (sub: BStd.sub BStd.term) + : PreDwarf.reg_data = + (** Compute the pre-dwarf data for a single subroutine/FDE *) + + (* A `call` always result in %rsp pointing to the RA *) + let init_cfa = + [(PreDwarf.(fde.start_pc), Regs.(RegOffset(X86_64.rsp, 8)))] in + let init_ra = + [(PreDwarf.(fde.start_pc), Regs.(RegOffset(DwRegCFA, -8)))] in + let init_reg_data = Regs.(RegMap.( + add DwRegRA init_ra + @@ add DwRegCFA init_cfa + @@ empty + )) + in + + let sub_cfg = BStd.Sub.to_cfg sub in + + assert false + +let compute_pre_dwarf proj: PreDwarf.pre_dwarf_data = + let prog = BStd.Project.program proj in + let subroutines = BStd.Term.enum BStd.sub_t prog in + let subdwarf = BStd.Seq.fold subroutines + ~init:PreDwarf.FdeMap.empty + ~f:(fun accu sub -> + let fde = fde_of_subroutine sub in + let predwarf = predwarf_of_sub fde sub in + PreDwarf.FdeMap.add fde predwarf accu) in + subdwarf diff --git a/DwarfSynth/PreDwarf.ml b/DwarfSynth/PreDwarf.ml new file mode 100644 index 0000000..2a318bf --- /dev/null +++ b/DwarfSynth/PreDwarf.ml @@ -0,0 +1,25 @@ +(** A program counter value *) +type pc = int + +(** A notice that the register save location has changed to this new value, + starting from the given pc *) +type reg_change = pc * Regs.reg_loc + +(** A structure holding, for each register, a list of changes. Used to track + the evolution of the storage location of every register. *) +type reg_data = reg_change list Regs.RegMap.t + +(** Represents a FDE (that is, morally, a function). *) +type fde = { + start_pc: pc; + end_pc: pc; + name: string; +} + +(** A map of FDEs *) +module FdeMap = Map.Make(struct + type t = fde + let compare = compare + end) + +type pre_dwarf_data = reg_data FdeMap.t diff --git a/DwarfSynth/Regs.ml b/DwarfSynth/Regs.ml new file mode 100644 index 0000000..8996340 --- /dev/null +++ b/DwarfSynth/Regs.ml @@ -0,0 +1,74 @@ +(** Defines everything related to registers. Architecture-independant. *) + +open Std + +module StrMap = Map.Make(String) + +(** A list of possible registers *) +type dwarf_reg = + DwReg of int (** A register identified by some ID *) + | DwRegCFA (** The Canonical Frame Address virtual register *) + | DwRegRA (** The Return Address virtual register *) + +(** A set of options of dwarf_reg. Useful to map BAP vars onto. *) +module DwRegOptSet = Set.Make(struct + type t = dwarf_reg option + let compare = compare + end) + +type mem_offset = int + +module RegMap = Map.Make(struct + type t=dwarf_reg + let compare r1 r2 = match (r1, r2) with + | DwReg(rid1), DwReg(rid2) -> compare rid1 rid2 + | r1, r2 -> Pervasives.compare r1 r2 + (* r1 and r2 are not both DwReg, so we can safely compare + them using the Pervasives compare *) + end) + +type reg_loc = RegOffset of dwarf_reg * mem_offset + +let is_register var = + let physical = BStd.Var.is_physical var in + let typ = BStd.Var.typ var in + let register = BStd.Type.(match typ with Imm(_) -> true | _ -> false) in + physical && register + +module X86_64 = struct + let rax = DwReg(0) + let rdx = DwReg(1) + let rcx = DwReg(2) + let rbx = DwReg(3) + let rsi = DwReg(4) + let rdi = DwReg(5) + let rbp = DwReg(6) + let rsp = DwReg(7) + let r8 = DwReg(8) + let r9 = DwReg(9) + let r10 = DwReg(10) + let r11 = DwReg(11) + let r12 = DwReg(12) + let r13 = DwReg(13) + let r14 = DwReg(14) + let r15 = DwReg(15) + let rip = DwReg(16) + + let name_map = StrMap.add "RSP" rsp StrMap.empty (* TODO *) + + let get_register reg = match is_register reg with + | false -> None + | true -> + (try Some (StrMap.find (BStd.Var.name reg) name_map) + with Not_found -> None) + + let of_var var = + match is_register var with + | false -> None + | true -> StrMap.find_opt (BStd.Var.name var) name_map + + let map_varset varset = + BStd.Var.Set.fold varset + ~init:DwRegOptSet.empty + ~f:(fun accu elt -> DwRegOptSet.add (of_var elt) accu) +end diff --git a/DwarfSynth/Simplest.ml b/DwarfSynth/Simplest.ml new file mode 100644 index 0000000..ca6f888 --- /dev/null +++ b/DwarfSynth/Simplest.ml @@ -0,0 +1,320 @@ +open Std +module CFG = BStd.Graphs.Ir + +type memory_offset = int64 + +type cfa_pos = + RspOffset of memory_offset + | RbpOffset of memory_offset + | CfaLostTrack + +type cfa_change = CfaChange of BStd.word * cfa_pos + +type cfa_changes_fde = cfa_change list + +module StrMap = Map.Make(String) +type cfa_changes = cfa_changes_fde StrMap.t + +module TIdMap = Map.Make(BStd.Tid) + +exception InvalidSub + +let pp_cfa_pos ppx = function + | RspOffset off -> Format.fprintf ppx "RSP + (%s)@." (Int64.to_string off) + | RbpOffset off -> Format.fprintf ppx "RBP + (%s)@." (Int64.to_string off) + | CfaLostTrack -> Format.fprintf ppx "??@." + +let pp_cfa_change ppx = function CfaChange(addr, cfa_pos) -> + Format.fprintf ppx "%a: %a" BStd.Word.pp_hex addr pp_cfa_pos cfa_pos + +let pp_cfa_changes_fde ppx = List.iter (pp_cfa_change ppx) + +let pp_cfa_changes ppx = + StrMap.iter (fun fde_name entry -> + Format.fprintf ppx "%s@\n====@\n@\n%a@." fde_name + pp_cfa_changes_fde entry) + +let pp_option_of sub_pp ppx = function + | None -> Format.fprintf ppx "None" + | Some x -> Format.fprintf ppx "Some %a" sub_pp x + +let addr_of term = + (** Get the address of a term *) + match BStd.Term.get_attr term BStd.address with + | None -> assert false + | Some addr -> addr + +let interpret_var_expr c_var offset expr = BStd.Bil.( + let closed_form = BStd.Exp.substitute + (var c_var) + (int (BStd.Word.of_int64 offset)) + expr + in + let res = BStd.Exp.eval closed_form in + match res with + | Imm value -> + Some (BStd.Word.to_int64_exn @@ BStd.Word.signed value) + | _ -> None + ) + +let process_def def (cur_offset: memory_offset) + : ((cfa_change * memory_offset) option) = + let lose_track addr = + Some (CfaChange(addr, CfaLostTrack), Int64.zero) + in + (match Regs.X86_64.of_var (BStd.Def.lhs def) with + | Some reg when reg = Regs.X86_64.rsp -> + let exp = BStd.Def.rhs def in + let free_vars = BStd.Exp.free_vars exp in + let free_x86_regs = Regs.X86_64.map_varset free_vars in + (match Regs.DwRegOptSet.cardinal free_x86_regs with + | 1 -> + let free_var = Regs.DwRegOptSet.choose free_x86_regs in + (match free_var with + | Some dw_var when dw_var = Regs.X86_64.rsp -> + let bil_var = (match BStd.Var.Set.choose free_vars with + | None -> assert false + | Some x -> x) in + let interpreted = interpret_var_expr bil_var cur_offset exp in + (match interpreted with + | None -> lose_track (addr_of def) + | Some interp_val -> + let gap = Int64.sub interp_val cur_offset in + let new_offset = Int64.sub cur_offset gap in + Some (CfaChange(addr_of def, RspOffset(new_offset)), new_offset) + ) + | _ -> lose_track (addr_of def) + ) + | _ -> lose_track @@ addr_of def + ) + | _ -> None) + +let process_jmp jmp (cur_offset: memory_offset) + : ((cfa_change * memory_offset) option) = + let gen_change off = + let new_offset = Int64.add cur_offset (Int64.of_int off) in + Some (CfaChange(addr_of jmp, RspOffset(new_offset)), new_offset) + in + + match (BStd.Jmp.kind jmp) with + | BStd.Call call -> gen_change (-8) + | BStd.Ret ret -> gen_change (8) + | _ -> None + +let sym_of_blk blk : cfa_changes_fde = + (** Extracts the symbolic CFA changes of a block. These changes assume that + at the beginning of the block, CFA = RspOffset(0) and will be offset + after *) + + let apply_offset (accu, cur_offset) = function + | None -> (accu, cur_offset) + | Some (change, n_offset) -> (change :: accu, n_offset) + in + + let fold_elt (accu, cur_offset) elt = match elt with + | `Def(def) -> + apply_offset (accu, cur_offset) @@ process_def def cur_offset + | `Jmp(jmp) -> + apply_offset (accu, cur_offset) @@ process_jmp jmp cur_offset + | _ -> (accu, cur_offset) + in + + let elts_seq = BStd.Blk.elts blk in + let out, end_offset = BStd.Seq.fold elts_seq + ~init:([], Int64.zero) + ~f:fold_elt in + out + +let end_offset (changelist: cfa_changes_fde): memory_offset option = + match changelist with + | CfaChange(_, RspOffset(x)) :: _ -> Some x + | _ -> None + +exception Inconsistent of BStd.tid + +let rec dfs_propagate changemap propagated parent_val node graph = + let c_tid = BStd.Term.tid @@ CFG.Node.label node in + match TIdMap.find_opt c_tid propagated with + | Some x -> + if x = parent_val then + (* Already propagated and consistent, all fine *) + propagated + else + (* Already propagated with a different value: inconsistency *) + raise (Inconsistent c_tid) + | None -> + let n_propagated = TIdMap.add c_tid parent_val propagated in + let outwards = CFG.Node.outputs node graph in + let self_entry = TIdMap.find c_tid changemap in + let offset = (match end_offset self_entry with + Some x -> x + | None -> Int64.zero) in + let cur_val = Int64.add parent_val offset in + BStd.Seq.fold outwards + ~init:n_propagated + ~f:(fun accu edge -> + dfs_propagate changemap accu cur_val (CFG.Edge.dst edge) graph) + +let get_entry_blk graph = + let entry = + BStd.Seq.find (CFG.nodes graph) + (fun node -> BStd.Seq.is_empty @@ CFG.Node.inputs node graph) + in match entry with + | None -> assert false + | Some x -> x + +let term_addr term = + BStd.Term.get_attr term BStd.address + +let same_keys map1 map2 = + let exists_in_other other key _ = + TIdMap.mem key other in + + TIdMap.for_all (exists_in_other map2) map1 + && TIdMap.for_all (exists_in_other map1) map2 + +let of_sub sub : cfa_changes_fde = + (** Extracts the `cfa_changes_fde` of a subroutine *) + + Format.eprintf "Sub %s...@." @@ BStd.Sub.name sub ; + + let initial_cfa_rsp_offset = Int64.of_int 8 in + let store_sym accu blk = + let blk = CFG.Node.label blk in + let res = sym_of_blk blk in + TIdMap.add (BStd.Term.tid blk) res accu + in + + let node_addr nd = term_addr @@ CFG.Node.label nd in + + let merge_corrected blk_tid changes offset = match (changes, offset) with + | Some changes, Some offset -> + Some ( + List.map (fun (CfaChange(addr, pos)) -> match pos with + RspOffset(off) -> CfaChange(addr, + RspOffset(Int64.add off offset)) + | RbpOffset(off) -> CfaChange(addr, RbpOffset(off)) + | CfaLostTrack -> CfaChange(addr, CfaLostTrack) + ) + changes + ) + | _ -> None + in + + let cfg = BStd.Sub.to_cfg sub in + + let tid_match = BStd.Seq.fold (CFG.nodes cfg) + ~init:TIdMap.empty + ~f:(fun accu node -> + let tid = BStd.Term.tid @@ CFG.Node.label node in + TIdMap.add tid node accu) + in + + let blk_sym = BStd.Seq.fold + ~init:TIdMap.empty + ~f:store_sym + @@ CFG.nodes cfg + in + + let entry_blk = get_entry_blk cfg in + let offset_map = dfs_propagate + blk_sym (TIdMap.empty) initial_cfa_rsp_offset entry_blk cfg in + + let corrected = TIdMap.merge merge_corrected blk_sym offset_map in + + let is_connex = same_keys tid_match corrected in + if not is_connex then + raise InvalidSub ; + + let tid_list = TIdMap.bindings tid_match in + let sorted_blk = List.sort (fun (tid1, bl1) (tid2, bl2) -> + let res = match (node_addr bl1, node_addr bl2) with + | Some addr1, Some addr2 -> compare addr1 addr2 + | Some _, None -> 1 + | None, Some _ -> -1 + | None, None -> compare tid1 tid2 + in + -res) + tid_list + in + + let out = List.fold_left + (fun accu blk -> + let changes = TIdMap.find blk corrected in + List.fold_left (fun accu chg -> chg::accu) + accu + changes + ) + [] + (List.map (fun (x, y) -> x) sorted_blk) in + + let sub_addr = (match term_addr sub with + | Some x -> x + | None -> assert false) in + let init = [ + CfaChange(sub_addr, RspOffset(initial_cfa_rsp_offset)) + ] in + init @ out + +let cleanup_fde (fde_changes: cfa_changes_fde) : cfa_changes_fde = + (** Cleanup the result of `of_sub`. + + Merges entries at the same address, propagates track lost *) + + let fold_one (accu, (last_commit:cfa_pos option), in_flight, lost_track) = function + | CfaChange(addr, cfa_change) as cur_change -> ( + match lost_track, in_flight, cfa_change with + | true, _, _ -> + (* Already lost track: give up *) + (accu, last_commit, None, lost_track) + | false, _, CfaLostTrack -> + (* Just lost track: give up the operation on flight as well *) + (cur_change :: accu, None, None, true) + | _, Some CfaChange(flight_addr, flight_chg), _ + when flight_addr = addr -> + (* On flight address matches current address: continue flying *) + accu, last_commit, Some cur_change, false + | _, Some CfaChange(_, in_flight_inner_pos), _ + when last_commit = Some in_flight_inner_pos -> + (* Doesn't match anymore, but there was some operation in flight, + which has the same result as what was last committed. Discard. *) + (accu, last_commit, Some cur_change, false) + | _, Some (CfaChange(_, in_flight_inner_pos) as in_flight_inner), _ -> + (* Doesn't match anymore, but there was some operation in flight: + commit it, put the new one in flight *) + (in_flight_inner :: accu, Some in_flight_inner_pos, + Some cur_change, false) + | _, None, _ -> + (* No operation in flight: put the new one in flight *) + (accu, last_commit, Some cur_change, false) + ) + in + + let extract_end_value (accu, _, in_flight, lost_track) = + List.rev @@ match lost_track, in_flight with + | true, _ -> accu + | false, None -> accu + | false, Some x -> x :: accu + in + + extract_end_value + @@ List.fold_left fold_one ([], None, None, false) fde_changes + + +let of_prog prog : cfa_changes = + (** Extracts the `cfa_changes` of a program *) + let fold_step accu sub = + (try + let res = (*cleanup_fde @@ *)of_sub sub in + StrMap.add (BStd.Sub.name sub) res accu + with InvalidSub -> accu) + in + let subroutines = BStd.Term.enum BStd.sub_t prog in + BStd.Seq.fold subroutines + ~init:StrMap.empty + ~f:fold_step + +let of_proj proj : cfa_changes = + (** Extracts the `cfa_changes` of a project *) + let prog = BStd.Project.program proj in + of_prog prog diff --git a/DwarfSynth/Std.ml b/DwarfSynth/Std.ml new file mode 100644 index 0000000..eb0bd71 --- /dev/null +++ b/DwarfSynth/Std.ml @@ -0,0 +1,3 @@ +module BStd = Bap.Std +module CKStd = Core_kernel.Std + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7d9b915 --- /dev/null +++ b/Makefile @@ -0,0 +1,18 @@ +OCAMLBUILD=bapbuild +BAPBUNDLE=bapbundle +ROOT_MODULE=dwarfsynth + + +all: install + +.PHONY: $(ROOT_MODULE).plugin +$(ROOT_MODULE).plugin: + $(OCAMLBUILD) $(ROOT_MODULE).plugin + +.PHONY: install +install: $(ROOT_MODULE).plugin + $(BAPBUNDLE) install $< + +.PHONY: clean +clean: + rm -rf _build diff --git a/_tags b/_tags new file mode 100644 index 0000000..606a539 --- /dev/null +++ b/_tags @@ -0,0 +1,8 @@ +true: bin_annot +"DwarfSynth": include +: for-pack(DwarfSynth) +: for-pack(DwarfSynth) +: for-pack(DwarfSynth) +: for-pack(DwarfSynth) +: for-pack(DwarfSynth) +: for-pack(DwarfSynth) diff --git a/dwarfsynth.ml b/dwarfsynth.ml new file mode 100644 index 0000000..4043df5 --- /dev/null +++ b/dwarfsynth.ml @@ -0,0 +1,26 @@ +(** dwarfsynth + * + * Entry point for the BAP plugin `dwarfsynth`, defining the command line + * interface + **) + +module Self = struct + include Bap.Std.Self() +end + +let main = DwarfSynth.Main.main + +module Cmdline = struct + module Cnf = Self.Config + + let outfile = Cnf.( + param (some string) "output" + ~doc:("The file in which the output ELF will be written. Output goes " + ^ "to stdout by default.") + ) + + let () = Cnf.( + when_ready ((fun {get=(!!)} -> + Bap.Std.Project.register_pass' (main !!outfile))) + ) +end diff --git a/dwarfsynth.plugin b/dwarfsynth.plugin new file mode 120000 index 0000000..617dc05 --- /dev/null +++ b/dwarfsynth.plugin @@ -0,0 +1 @@ +/home/tobast/cours/internship/src/dwarf-synthesis/_build/dwarfsynth.plugin \ No newline at end of file