open Std module CFG = BStd.Graphs.Ir type memory_offset = int64 type memory_address = int64 module AddrMap = Map.Make(Int64) module AddrSet = Set.Make(Int64) type cfa_pos = RspOffset of memory_offset | RbpOffset of memory_offset | CfaLostTrack type rbp_pos = | RbpUndef | RbpCfaOffset of memory_offset type reg_pos = cfa_pos * rbp_pos type reg_changes_fde = reg_pos AddrMap.t type subroutine_cfa_data = { reg_changes_fde: reg_changes_fde; beg_pos: memory_address; end_pos: memory_address; } type block_local_state = { rbp_vars: BStd.Var.Set.t; rbp_pop_set: BStd.Tid.Set.t } module StrMap = Map.Make(String) type subroutine_cfa_map = subroutine_cfa_data StrMap.t module TIdMap = Map.Make(BStd.Tid) exception InvalidSub exception UnexpectedRbpSet type synthesis_settings = { mutable no_rbp_undef: bool } let __settings = { no_rbp_undef = false } let pp_cfa_pos ppx = function | RspOffset off -> Format.fprintf ppx "RSP + (%s)" (Int64.to_string off) | RbpOffset off -> Format.fprintf ppx "RBP + (%s)" (Int64.to_string off) | CfaLostTrack -> Format.fprintf ppx "??@." let pp_rbp_pos ppx = function | RbpUndef -> Format.fprintf ppx "u" | RbpCfaOffset off -> Format.fprintf ppx "c%+Ld" off let pp_reg_pos ppx (cfa_pos, rbp_pos) = Format.fprintf ppx "(%a; %a)" pp_cfa_pos cfa_pos pp_rbp_pos rbp_pos let pp_int64_hex ppx number = let mask_short = Int64.(pred (shift_left one 16)) in let pp_short number = Format.fprintf ppx "%04x" Int64.(to_int (logand number mask_short)) in List.iter pp_short @@ List.map (fun x -> Int64.(shift_right number (16*x))) [3;2;1;0] let pp_cfa_changes_fde ppx cfa_changes = AddrMap.iter (fun addr change -> Format.fprintf ppx "%a: %a@." pp_int64_hex addr pp_cfa_pos change) cfa_changes let pp_cfa_changes ppx = StrMap.iter (fun fde_name entry -> Format.fprintf ppx "%s@\n====@\n@\n%a@." fde_name pp_cfa_changes_fde entry) let pp_option_of sub_pp ppx = function | None -> Format.fprintf ppx "None" | Some x -> Format.fprintf ppx "Some %a" sub_pp x let opt_addr_of term = (** Get the address of a term as an option, if it has one*) BStd.Term.get_attr term BStd.address let addr_of term = (** Get the address of a term *) match opt_addr_of term with | None -> assert false | Some addr -> addr let opt_addr_of_blk_elt = function | `Def def -> opt_addr_of def | `Jmp jmp -> opt_addr_of jmp | `Phi phi -> opt_addr_of phi let entrypoint_address blk = (** Find the first instruction address in the current block. Return None if no instruction has address. *) let fold_one accu cur_elt = match accu, opt_addr_of_blk_elt cur_elt with | None, None -> None | None, Some x -> Some x | _, _ -> accu in BStd.Seq.fold (BStd.Blk.elts blk) ~init:None ~f:fold_one let to_int64_addr addr = BStd.Word.to_int64_exn addr let int64_addr_of x = to_int64_addr @@ addr_of x let map_option f = function | None -> None | Some x -> Some (f x) let build_next_instr (disasm: BStd.disasm): AddrSet.t AddrMap.t = (** Build a map of memory_address -> AddrSet.t holding, for each address, the set of instructions coming right after the instruction at given address. There might be multiple such addresses, if the current instruction is at a point of branching. *) let rec build_of_instr_list cur_map = function (** Maps an instruction to its following instruction in this block *) | (cur_mem, cur_insn) :: ((next_mem, next_insn) as elt2) :: tl -> (* Its only successor is next_insn *) let new_map = (try let cur_addr = to_int64_addr @@ BStd.Memory.min_addr cur_mem and next_addr = to_int64_addr @@ BStd.Memory.min_addr next_mem in AddrMap.add cur_addr (AddrSet.singleton next_addr) cur_map with _ -> cur_map) in build_of_instr_list new_map (elt2 :: tl) | (cur_mem, _) :: [] -> let last_addr = (try Some (to_int64_addr @@ BStd.Memory.min_addr cur_mem) with _ -> None) in cur_map, last_addr (* Ignore the last one: its successors are held in the graph *) | [] -> cur_map, None in let cfg = BStd.Disasm.cfg disasm in let rec block_addresses block = (try BStd.Block.addr block |> to_int64_addr |> AddrSet.singleton with _ -> (* Probably an intermediary node, eg. JMP --> [inermed node] --> BLK *) let outputs = BStd.Graphs.Cfg.Node.outputs block cfg |> BStd.Seq.map ~f:BStd.Graphs.Cfg.Edge.dst in BStd.Seq.fold outputs ~init:AddrSet.empty ~f:(fun accu block -> AddrSet.union (block_addresses block) accu) ) in let build_of_block cur_map block = let cur_map, last_addr = build_of_instr_list cur_map (BStd.Block.insns block) in (match last_addr with | Some last_addr -> let following_set = BStd.Graphs.Cfg.Node.outputs block cfg |> BStd.Seq.fold ~init:AddrSet.empty ~f:(fun set edge -> AddrSet.union (block_addresses (BStd.Graphs.Cfg.Edge.dst edge)) set) in AddrMap.add last_addr following_set cur_map | None -> cur_map ) in BStd.Seq.fold (BStd.Graphs.Cfg.nodes cfg) ~init:AddrMap.empty ~f:build_of_block let find_rbp_pop_set cfg entry = (** Returns a BStd.Tid.Set.t of the terms actually "popping" %rbp, that is, the terms that should trigger a change to RbpUndef of the %rbp register. The current heuristic is to consider the expressions i) of the form `rbp = F(mem, rsp)` (alledgedly, rbp = something from the stack); ii) that are the last of this kind in the subroutine's CFG (ie. such that there is not another instruction matching (i) that is reachable through the CFG from the current instruction). *) let def_is_rbp_pop def = let is_pop_expr expr = let free_vars = BStd.Exp.free_vars expr in let free_x86_regs = Regs.X86_64.map_varset free_vars in (match Regs.DwRegOptSet.cardinal free_x86_regs with | 2 -> let reg = free_x86_regs |> Regs.DwRegOptSet.filter (fun x -> match x with None -> false | Some _ -> true) |> Regs.DwRegOptSet.choose in let has_mem_var = BStd.Var.Set.exists ~f:(fun x -> BStd.Var.name x = "mem") free_vars in (match reg, has_mem_var with | Some dw_var, true when dw_var = Regs.X86_64.rsp -> true | _ -> false) | _ -> false ) in (match Regs.X86_64.of_var (BStd.Def.lhs def), is_pop_expr @@ BStd.Def.rhs def with | Some reg, true when reg = Regs.X86_64.rbp -> true | _ -> false ) in let block_find_rbp_pop block = let fold_elt = function | `Def(def) when (def_is_rbp_pop def) -> Some (BStd.Term.tid def) | _ -> None in let elts_seq = BStd.Blk.elts block in let last_pop = BStd.Seq.fold elts_seq ~init:None ~f:(fun accu elt -> (match fold_elt elt with | None -> accu | Some tid -> Some tid)) in last_pop in let rec block_dfs node visited = (* DFS on the CFG to find rbp pops, and rule out those that are not final *) let block = CFG.Node.label node in (match BStd.Blk.Set.mem visited block with | true -> (* Loop: we already visited this node *) BStd.Tid.Set.empty, true, visited | false -> let visited = BStd.Blk.Set.add visited block in let pop_set, has_pop, visited = BStd.Seq.fold (CFG.Node.succs node cfg) ~f:(fun (pre_pop_set, pre_has_pop, visited) child -> let cur_pop_set, cur_has_pop, visited = block_dfs child visited in (BStd.Tid.Set.union pre_pop_set cur_pop_set), (pre_has_pop || cur_has_pop), visited ) ~init:(BStd.Tid.Set.empty, false, visited) in let pop_set, has_pop = (match has_pop with | false -> (* No rbp pop below, we seek rbp pops in this block *) (match block_find_rbp_pop block with | None -> pop_set, false | Some tid -> BStd.Tid.Set.add pop_set tid, true ) | true -> pop_set, has_pop) in pop_set, has_pop, visited ) in if __settings.no_rbp_undef then BStd.Tid.Set.empty else ( let pop_set, _, _ = block_dfs entry (BStd.Blk.Set.empty) in pop_set ) let interpret_var_expr c_var offset expr = BStd.Bil.( let closed_form = BStd.Exp.substitute (var c_var) (int (BStd.Word.of_int64 (Int64.neg offset))) expr in let res = BStd.Exp.eval closed_form in match res with | Imm value -> Some (Int64.neg @@ BStd.Word.to_int64_exn @@ BStd.Word.signed value) | _ -> None ) let is_single_free_reg expr = (** Detects whether `expr` contains a single free variable which is a machine register, and if so, extracts this register and returns a pair `(formal, reg`) of its formal variable in the expression and the register. Otherwise return None. *) let free_vars = BStd.Exp.free_vars expr in let free_x86_regs = Regs.X86_64.map_varset free_vars in (match Regs.DwRegOptSet.cardinal free_x86_regs with | 1 -> let free_var = Regs.DwRegOptSet.choose free_x86_regs in (match free_var with | Some dw_var -> let bil_var = (match BStd.Var.Set.choose free_vars with | None -> assert false | Some x -> x) in Some (bil_var, dw_var) | _ -> None ) | _ -> None ) let process_def (local_state: block_local_state) def (cur_reg: reg_pos) : (reg_pos option * block_local_state) = let lose_track = Some CfaLostTrack in let cur_cfa, cur_rbp = cur_reg in let out_cfa = (match cur_cfa, Regs.X86_64.of_var (BStd.Def.lhs def) with | RspOffset(cur_offset), Some reg when reg = Regs.X86_64.rsp -> let exp = BStd.Def.rhs def in (match is_single_free_reg exp with | Some (bil_var, dw_var) when dw_var = Regs.X86_64.rsp -> let interpreted = interpret_var_expr bil_var cur_offset exp in (match interpreted with | None -> lose_track | Some new_offset -> Some (RspOffset(new_offset)) ) | _ -> lose_track ) | RspOffset(cur_offset), Some reg when reg = Regs.X86_64.rbp -> (* We have CFA=rsp+k and a line %rbp <- [expr]. Might be a %rbp <- %rsp *) let exp = BStd.Def.rhs def in (match is_single_free_reg exp with | Some (bil_var, dw_var) when dw_var = Regs.X86_64.rsp -> (* We have %rbp := F(%rsp) *) (* FIXME we wish to have %rbp := %rsp. An ugly and non-robust test to check that would be interpret F(0), expecting that F is at worst affine - then a restult of 0 means that %rbp := %rsp + 0 *) let interpreted = interpret_var_expr bil_var (Int64.zero) exp in (match interpreted with | Some offset when offset = Int64.zero -> Some (RbpOffset(cur_offset)) | _ -> (* Previous instruction was rsp-indexed, here we put something weird in %rbp, let's keep indexing with rsp and do nothing *) None ) | _ -> None ) | RbpOffset(cur_offset), Some reg when reg = Regs.X86_64.rbp -> (* Assume we are overwriting %rbp with something — we must revert to some rsp-based indexing *) (* FIXME don't assume the rsp offset will always be 8, find a smart way to figure this out. We actually use offset 16 because the `pop` will occur after the value is read from the stack. *) Some (RspOffset(Int64.of_int 16)) | _ -> None ) in let is_rbp_save_expr expr local_state = let free_vars = BStd.Exp.free_vars expr in let card = BStd.Var.Set.length free_vars in let has_mem_var = BStd.Var.Set.exists ~f:(fun x -> BStd.Var.name x = "mem") free_vars in let free_x86_regs = Regs.X86_64.map_varset free_vars in let has_rsp_var = free_x86_regs |> Regs.DwRegOptSet.exists (fun x -> match x with | Some x when x = Regs.X86_64.rsp -> true | _ -> false) in let has_rbp_var = free_x86_regs |> Regs.DwRegOptSet.exists (fun x -> match x with | Some x when x = Regs.X86_64.rbp -> true | _ -> false) in let has_intermed_rbp_var = free_vars |> BStd.Var.Set.inter local_state.rbp_vars |> BStd.Var.Set.is_empty |> not in (card = 3 && has_mem_var && has_rsp_var && (has_rbp_var || has_intermed_rbp_var)) in let is_rbp_expr expr = let free_vars = BStd.Exp.free_vars expr in let free_x86_regs = Regs.X86_64.map_varset free_vars in (match Regs.DwRegOptSet.cardinal free_x86_regs with | 1 -> let reg = Regs.DwRegOptSet.choose free_x86_regs in (match reg with | Some dwreg when dwreg = Regs.X86_64.rbp -> true | _ -> false) | _ -> false) in let gather_rbp_intermed_var def cur_state = (* If `def` is `some intermed. var <- rbp`, add this information in the local state *) (match is_rbp_expr @@ BStd.Def.rhs def with | true -> let lhs_var = BStd.Def.lhs def in if (BStd.Var.is_virtual lhs_var && BStd.Var.typ lhs_var = BStd.reg64_t) then ( (* This `def` is actually of the type we want to store. *) let n_rbp_vars = BStd.Var.Set.add cur_state.rbp_vars lhs_var in { cur_state with rbp_vars = n_rbp_vars } ) else cur_state | false -> cur_state ) in let out_rbp, new_state = (match cur_rbp with | RbpUndef -> let cur_state = gather_rbp_intermed_var def local_state in (* We assume that an expression is saving %rbp on the stack at the address %rip when the current def is an expression of the kind `MEM <- F(MEM, %rip, v)` where `v` is either `%rbp` or some intermediary variable holding `%rbp`. This approach is sound when %rbp is saved using a `push`, but probably wrong when saved using a `mov` on some stack-space allocated previously (eg. for multiple registers saved at once). It would be far better to actually read the position at which `v` is saved, but this requires parsing the actual rhs expression, which is not easily done: FIXME *) let new_rbp = if (BStd.Var.name @@ BStd.Def.lhs def = "mem" && is_rbp_save_expr (BStd.Def.rhs def) cur_state) then (match cur_cfa with | RspOffset off -> Some (RbpCfaOffset (Int64.mul Int64.minus_one off)) | _ -> raise UnexpectedRbpSet ) else None in new_rbp, cur_state | RbpCfaOffset offs -> (* We go back to RbpUndef if the current def is in the rbp_pop_set -- see `find_rbp_pop_set` *) (match BStd.Tid.Set.mem (local_state.rbp_pop_set) @@ BStd.Term.tid def with | true -> Some RbpUndef, local_state | false -> None, local_state ) ) in (match out_cfa, out_rbp with | None, None -> None | Some cfa, None -> Some (cfa, cur_rbp) | None, Some rbp -> Some (cur_cfa, rbp) | Some cfa, Some rbp -> Some (cfa, rbp)), new_state let process_jmp jmp (cur_reg: reg_pos) : (reg_pos option) = let cur_cfa, cur_rbp = cur_reg in let gen_change = match cur_cfa with | RspOffset cur_offset -> (fun off -> let new_offset = Int64.add cur_offset (Int64.of_int off) in Some (RspOffset(new_offset), cur_rbp) ) | _ -> (fun _ -> None) in match (BStd.Jmp.kind jmp) with | BStd.Call call -> gen_change (-8) | BStd.Ret ret -> gen_change (8) | _ -> None let process_blk next_instr_graph rbp_pop_set (block_init: reg_pos) blk : (reg_changes_fde * reg_pos) = (** Extracts the registers (CFA+RBP) changes of a block. *) let apply_offset cur_addr_opt ((accu:reg_changes_fde), cur_reg, local_state) = function | None -> (accu, cur_reg, local_state) | Some reg_pos -> let cur_addr = (match cur_addr_opt with | None -> assert false | Some x -> to_int64_addr x) in (AddrSet.fold (fun n_addr cur_accu -> AddrMap.add n_addr reg_pos cur_accu) (AddrMap.find cur_addr next_instr_graph) accu), reg_pos, local_state in let fold_elt (accu, cur_reg, cur_local_state) elt = match elt with | `Def(def) -> let new_offset, new_state = process_def cur_local_state def cur_reg in apply_offset (opt_addr_of def) (accu, cur_reg, new_state) new_offset | `Jmp(jmp) -> apply_offset (opt_addr_of jmp) (accu, cur_reg, cur_local_state) @@ process_jmp jmp cur_reg | _ -> (accu, cur_reg, cur_local_state) in let init_changes = (match opt_addr_of blk with | None -> AddrMap.empty | Some x -> let blk_address = to_int64_addr x in AddrMap.singleton blk_address block_init ) in let empty_local_state = { rbp_vars = BStd.Var.Set.empty; rbp_pop_set = rbp_pop_set } in let elts_seq = BStd.Blk.elts blk in let out_reg, end_reg, _ = BStd.Seq.fold elts_seq ~init:(init_changes, block_init, empty_local_state) ~f:fold_elt in out_reg, end_reg exception Inconsistent of BStd.tid let get_entry_blk graph = let entry = BStd.Seq.min_elt (CFG.nodes graph) ~cmp:(fun x y -> let ax = opt_addr_of @@ CFG.Node.label x and ay = opt_addr_of @@ CFG.Node.label y in match ax, ay with | None, None -> compare x y | Some _, None -> -1 | None, Some _ -> 1 | Some ax, Some ay -> compare (to_int64_addr ax) (to_int64_addr ay)) in match entry with | None -> assert false | Some x -> x let find_last_addr sub = (** Finds the maximal instruction address in a subroutine *) let map_opt fl fr merge l r = match l, r with | None, None -> None | Some x, None -> Some (fl x) | None, Some y -> Some (fr y) | Some x, Some y -> Some (merge (fl x) (fr y)) in let max_opt_addr_word = map_opt (fun x -> x) (fun y -> to_int64_addr y) max in let max_opt_addr = map_opt (fun x -> x) (fun y -> y) max in let max_def cur_max def = max_opt_addr_word cur_max (opt_addr_of def) in let fold_res = BStd.Seq.fold (BStd.Term.enum BStd.blk_t sub) ~init:None ~f:(fun cur_max blk -> max_opt_addr (BStd.Seq.fold (BStd.Term.enum BStd.def_t blk) ~init:cur_max ~f:max_def) (BStd.Seq.fold (BStd.Term.enum BStd.jmp_t blk) ~init:cur_max ~f:max_def) ) in match fold_res with | None -> Int64.zero | Some x -> x let cleanup_fde (fde_changes: reg_changes_fde) : reg_changes_fde = (** Cleanup the result of `of_sub`. Merges entries at the same address, propagates track lost *) let fold_one addr reg_change (accu, last_change, lost_track) = match reg_change, last_change, lost_track with | _, _, true -> (accu, None, lost_track) | (CfaLostTrack, _), _, false -> (AddrMap.add addr reg_change accu, None, true) | reg_change, Some prev_change, false when reg_change = prev_change -> (accu, last_change, false) | reg_change, _, false -> (AddrMap.add addr reg_change accu, Some reg_change, false) in match AddrMap.fold fold_one fde_changes (AddrMap.empty, None, false) with | out, _, _ -> out let process_sub sub next_instr_graph : subroutine_cfa_data = (** Extracts the `cfa_changes_fde` of a subroutine *) let cfg = BStd.Sub.to_cfg sub in let first_addr = int64_addr_of sub in let last_addr = find_last_addr sub in let initial_cfa_rsp_offset = Int64.of_int 8 in let entry_blk = get_entry_blk cfg in let rbp_pop_set = find_rbp_pop_set cfg entry_blk in let rec dfs_process (sub_changes: (reg_changes_fde * reg_pos) TIdMap.t) node (entry_offset: reg_pos) = (** Processes one block *) let cur_blk = CFG.Node.label node in let tid = BStd.Term.tid @@ cur_blk in match (TIdMap.find_opt tid sub_changes) with | None -> (* Not yet visited: compute the changes *) let cur_blk_changes, end_reg = process_blk next_instr_graph rbp_pop_set entry_offset cur_blk in let n_sub_changes = TIdMap.add tid (cur_blk_changes, entry_offset) sub_changes in BStd.Seq.fold (CFG.Node.succs node cfg) ~f:(fun accu child -> dfs_process accu child end_reg) ~init:n_sub_changes | Some (_, former_entry_offset) -> (* Already visited: check that entry values are matching *) if entry_offset <> former_entry_offset then ( Format.eprintf "Found inconsistency (0x%Lx): %a -- %a@." (int64_addr_of cur_blk) pp_reg_pos entry_offset pp_reg_pos former_entry_offset ; raise (Inconsistent tid) ) else sub_changes in let initial_offset = (RspOffset initial_cfa_rsp_offset, RbpUndef) in let changes_map = dfs_process TIdMap.empty entry_blk initial_offset in let merged_changes = TIdMap.fold (fun _ (cfa_changes, _) accu -> AddrMap.union (fun _ v1 v2 -> if v1 = v2 then Some v1 else assert false) cfa_changes accu) changes_map AddrMap.empty in let reg_changes = cleanup_fde merged_changes in let output = { reg_changes_fde = reg_changes ; beg_pos = first_addr ; end_pos = last_addr ; } in output let of_prog prog next_instr_graph : subroutine_cfa_map = (** Extracts the `cfa_changes` of a program *) let fold_step accu sub = (try let subroutine_data = process_sub sub next_instr_graph in StrMap.add (BStd.Sub.name sub) subroutine_data accu with | InvalidSub -> accu | Inconsistent tid -> Format.eprintf "Inconsistent TId %a in subroutine %s, skipping.@." BStd.Tid.pp tid (BStd.Sub.name sub); accu ) in let subroutines = BStd.Term.enum BStd.sub_t prog in BStd.Seq.fold subroutines ~init:StrMap.empty ~f:fold_step let of_proj no_rbp_undef proj : subroutine_cfa_map = (** Extracts the `cfa_changes` of a project *) __settings.no_rbp_undef <- no_rbp_undef ; let next_instr_graph = build_next_instr (BStd.Project.disasm proj) in let prog = BStd.Project.program proj in of_prog prog next_instr_graph let clean_lost_track_subs pre_dwarf : subroutine_cfa_map = (** Removes the subroutines on which we lost track from [pre_dwarf] *) let sub_lost_track sub_name (sub: subroutine_cfa_data) = not @@ AddrMap.exists (fun addr pos -> let cfa_pos, _ = pos in (match cfa_pos with | RspOffset _ | RbpOffset _ -> false | CfaLostTrack -> true)) sub.reg_changes_fde in StrMap.filter sub_lost_track pre_dwarf