Add uncommitted tests and curiosities

Check_gen: patch for flat vs pyramid clang tables
clang generates tables like this LOC CFA rbx rbp r14 r15 ra 00000000000028a0 rsp+8 u u u u c-8 00000000000028a1 rsp+16 u u u u c-8 00000000000028a3 rsp+24 u u u u c-8 00000000000028a5 rsp+32 u u u u c-8 00000000000028a6 rsp+40 u u u u c-8 00000000000028aa rsp+64 c-40 c-16 c-32 c-24 c-8
2019-04-05 16:39:21 +02:00 · 2019-04-05 16:32:00 +02:00 · 2019-04-05 11:23:18 +02:00 · 2019-04-05 11:23:13 +02:00 · 2019-04-04 19:47:36 +02:00 · 2019-04-04 13:49:39 +02:00
13 changed files with 5754 additions and 125 deletions
--- a/DwarfSynth/Main.ml
+++ b/DwarfSynth/Main.ml
@ -1,8 +1,8 @@
 open Std
-let main outfile proj =
+let main ?no_rbp_undef:(no_rbp_undef=false) outfile proj =
  let pre_dwarf = proj
-                  |> Simplest.of_proj
+                  |> Simplest.of_proj no_rbp_undef
                  |> Simplest.clean_lost_track_subs in
  Format.printf "%a" Frontend.pp_pre_dwarf_readelf pre_dwarf;
  let pre_c_dwarf = PreCBinding.convert_pre_c pre_dwarf in
--- a/DwarfSynth/Simplest.ml
+++ b/DwarfSynth/Simplest.ml
@ -27,7 +27,8 @@ type subroutine_cfa_data = {
 }
 type block_local_state = {
-  rbp_vars: BStd.Var.Set.t
+  rbp_vars: BStd.Var.Set.t;
  rbp_pop_set: BStd.Tid.Set.t
 }
 module StrMap = Map.Make(String)
@ -38,11 +39,26 @@ module TIdMap = Map.Make(BStd.Tid)
 exception InvalidSub
 exception UnexpectedRbpSet
 type synthesis_settings = {
    mutable no_rbp_undef: bool
  }
 let __settings = {
  no_rbp_undef = false
 }
 let pp_cfa_pos ppx = function
  | RspOffset off -> Format.fprintf ppx "RSP + (%s)" (Int64.to_string off)
  | RbpOffset off -> Format.fprintf ppx "RBP + (%s)" (Int64.to_string off)
  | CfaLostTrack -> Format.fprintf ppx "??@."
 let pp_rbp_pos ppx = function
  | RbpUndef -> Format.fprintf ppx "u"
  | RbpCfaOffset off -> Format.fprintf ppx "c%+Ld" off
 let pp_reg_pos ppx (cfa_pos, rbp_pos) =
  Format.fprintf ppx "(%a; %a)" pp_cfa_pos cfa_pos pp_rbp_pos rbp_pos
 let pp_int64_hex ppx number =
  let mask_short = Int64.(pred (shift_left one 16)) in
  let pp_short number =
@ -82,6 +98,15 @@ let opt_addr_of_blk_elt = function
  | `Jmp jmp -> opt_addr_of jmp
  | `Phi phi -> opt_addr_of phi
 let is_ghost_sub sub =
  (** Check whether the subroutine has content *)
  let is_ghost_block blk =
    BStd.Blk.elts blk
    |> BStd.Seq.is_empty
  in
  let blk_seq = BStd.Term.enum BStd.blk_t sub in
  BStd.Seq.for_all blk_seq ~f:is_ghost_block
 let entrypoint_address blk =
  (** Find the first instruction address in the current block.
      Return None if no instruction has address.  *)
@ -104,79 +129,195 @@ let map_option f = function
  | None -> None
  | Some x -> Some (f x)
-let build_next_instr graph =
+exception Block_not_in_subroutine
 let build_next_instr sub_ranges (disasm: BStd.disasm): AddrSet.t AddrMap.t =
  (** Build a map of memory_address -> AddrSet.t holding, for each address, the
      set of instructions coming right after the instruction at given address.
      There might be multiple such addresses, if the current instruction is at
      a point of branching.  *)
-  let addresses_in_block blk =
+  let rec build_of_instr_list cur_map = function
-    (** Set of addresses present in the block *)
+    (** Maps an instruction to its following instruction in this block *)
-    BStd.Seq.fold (BStd.Blk.elts blk)
+    | (cur_mem, cur_insn) :: ((next_mem, next_insn) as elt2) :: tl ->
-      ~init:AddrSet.empty
+      (* Its only successor is next_insn *)
-      ~f:(fun accu elt ->
+      let new_map =
-          let addr = opt_addr_of_blk_elt elt in
+        (try
-          match addr with
+           let cur_addr = to_int64_addr @@ BStd.Memory.min_addr cur_mem
-          | None -> accu
+           and next_addr = to_int64_addr @@ BStd.Memory.min_addr next_mem in
-          | Some x ->
+           AddrMap.add cur_addr (AddrSet.singleton next_addr) cur_map
-            (try
+         with _ -> cur_map)
-               AddrSet.add (BStd.Word.to_int64_exn x) accu
+        in
-             with _ -> accu)
+        build_of_instr_list new_map (elt2 :: tl)
-        )
+    | (cur_mem, cur_insn) :: [] ->
      let last_addr =
        (try Some (to_int64_addr @@ BStd.Memory.min_addr cur_mem)
         with _ -> None) in
      cur_map, last_addr
      (* Ignore the last one: its successors are held in the graph *)
    | [] ->
      cur_map, None
  in
-  let node_successors_addr (nd: CFG.node) : AddrSet.t =
+  let cfg = BStd.Disasm.cfg disasm in
    let rec do_find_succ accu nd =
      let fold_one accu c_node =
        match entrypoint_address (CFG.Node.label c_node) with
        | Some addr ->
          (try
             AddrSet.add (BStd.Word.to_int64_exn addr) accu
           with _ -> accu)
        | None -> do_find_succ accu c_node
      in
-      let succ = CFG.Node.succs nd graph in
+  let rec block_addresses block =
-      BStd.Seq.fold succ
+    (try BStd.Block.addr block
-        ~init:accu
+         |> to_int64_addr
-        ~f:fold_one
+         |> AddrSet.singleton
-    in
+     with _ ->
-    do_find_succ AddrSet.empty nd
+       (* Probably an intermediary node, eg. JMP --> [inermed node] --> BLK *)
       let outputs = BStd.Graphs.Cfg.Node.outputs block cfg
                     |> BStd.Seq.map ~f:BStd.Graphs.Cfg.Edge.dst in
       BStd.Seq.fold outputs
         ~init:AddrSet.empty
         ~f:(fun accu block -> AddrSet.union (block_addresses block) accu)
    )
  in
-  let build_of_block accu_map node =
+  let build_of_block cur_map block =
-    let blk = CFG.Node.label node in
+    (try
-    let node_successors = node_successors_addr node in
+       (* First, check that this block belongs to a subroutine *)
-    let instr_addresses = AddrSet.elements @@ addresses_in_block blk in
+       let block_first_address = (
         try
           to_int64_addr @@ BStd.Block.addr block
         with _ -> raise Block_not_in_subroutine) in
       let sub_first_addr, sub_last_addr = (
         try AddrMap.find_last
               (fun start_addr -> start_addr <= block_first_address) sub_ranges
         with Not_found ->
           raise Block_not_in_subroutine
       ) in
-    let rec accumulate_mappings mappings addr_list = function
+       (* Add the sequence of instuctions inside the block itself *)
-      | None -> mappings
+       let cur_map, last_addr =
-      | Some (instr, instr_seq) as cur_instr ->
+         build_of_instr_list cur_map (BStd.Block.insns block) in
-        let instr_addr = opt_addr_of_blk_elt instr in
+
-        match (map_option to_int64_addr instr_addr), addr_list with
+       (* Then the set of possible destinations for the block terminator *)
-        | None, _ ->
+       (match last_addr with
-          accumulate_mappings mappings addr_list @@ BStd.Seq.next instr_seq
+        | Some last_addr ->
-        | Some cur_addr, next_addr::t when cur_addr >= next_addr ->
+          let following_set = BStd.Graphs.Cfg.Node.outputs block cfg
-          accumulate_mappings mappings t cur_instr
+                              |> BStd.Seq.fold
-        | Some cur_addr, next_addr::_ ->
+                                ~init:AddrSet.empty
-          let n_mappings = AddrMap.add
+                                ~f:(fun set edge -> AddrSet.union
-              cur_addr (AddrSet.singleton next_addr) mappings in
+                                       (block_addresses
-          accumulate_mappings n_mappings addr_list @@ BStd.Seq.next instr_seq
+                                          (BStd.Graphs.Cfg.Edge.dst edge))
-        | Some cur_addr, [] ->
+                                       set)
-          let n_mappings = AddrMap.add
+                              |> AddrSet.filter (fun addr ->
-              cur_addr node_successors mappings in
+                                  sub_first_addr <= addr
-          accumulate_mappings n_mappings addr_list @@ BStd.Seq.next instr_seq
+                                  && addr <= sub_last_addr)
-    in
+                                (* ^ We must ensure the landing address belongs
-    accumulate_mappings
+                                   to the current subroutine for our purpose *)
-      accu_map
+          in
-      instr_addresses
+          AddrMap.add last_addr following_set cur_map
-      (BStd.Seq.next @@ BStd.Blk.elts blk)
+        | None -> cur_map
       )
     with Block_not_in_subroutine ->
       cur_map
    )
  in
-  BStd.Seq.fold (CFG.nodes graph)
+  BStd.Seq.fold (BStd.Graphs.Cfg.nodes cfg)
    ~init:AddrMap.empty
    ~f:build_of_block
 let find_rbp_pop_set cfg entry =
  (** Returns a BStd.Tid.Set.t of the terms actually "popping" %rbp, that is,
      the terms that should trigger a change to RbpUndef of the %rbp register.
      The current heuristic is to consider the expressions
       i) of the form `rbp = F(mem, rsp)` (alledgedly, rbp = something from the
        stack);
      ii) that are the last of this kind in the subroutine's CFG (ie. such that
        there is not another instruction matching (i) that is reachable through
        the CFG from the current instruction).
  *)
  let def_is_rbp_pop def =
    let is_pop_expr expr =
      let free_vars = BStd.Exp.free_vars expr in
      let free_x86_regs = Regs.X86_64.map_varset free_vars in
      (match Regs.DwRegOptSet.cardinal free_x86_regs with
       | 2 ->
         let reg = free_x86_regs
                   |> Regs.DwRegOptSet.filter
                     (fun x -> match x with None -> false | Some _ -> true)
                   |> Regs.DwRegOptSet.choose in
         let has_mem_var = BStd.Var.Set.exists
             ~f:(fun x -> BStd.Var.name x = "mem")
             free_vars in
         (match reg, has_mem_var with
          | Some dw_var, true when dw_var = Regs.X86_64.rsp -> true
          | _ -> false)
       | _ -> false
      )
    in
    (match Regs.X86_64.of_var (BStd.Def.lhs def),
           is_pop_expr @@ BStd.Def.rhs def with
    | Some reg, true when reg = Regs.X86_64.rbp -> true
    | _ -> false
    )
  in
  let block_find_rbp_pop block =
    let fold_elt = function
      | `Def(def) when (def_is_rbp_pop def) -> Some (BStd.Term.tid def)
      | _ -> None
    in
    let elts_seq = BStd.Blk.elts block in
    let last_pop = BStd.Seq.fold elts_seq
        ~init:None
        ~f:(fun accu elt ->
            (match fold_elt elt with
             | None -> accu
             | Some tid -> Some tid))
    in
    last_pop
  in
  let rec block_dfs node visited =
    (* DFS on the CFG to find rbp pops, and rule out those that are not final
     *)
    let block = CFG.Node.label node in
    (match BStd.Blk.Set.mem visited block with
    | true ->
      (* Loop: we already visited this node *)
      BStd.Tid.Set.empty, true, visited
    | false ->
      let visited = BStd.Blk.Set.add visited block in
      let pop_set, has_pop, visited =
        BStd.Seq.fold (CFG.Node.succs node cfg)
          ~f:(fun (pre_pop_set, pre_has_pop, visited) child ->
              let cur_pop_set, cur_has_pop, visited =
                block_dfs child visited in
              (BStd.Tid.Set.union pre_pop_set cur_pop_set),
              (pre_has_pop || cur_has_pop),
              visited
            )
          ~init:(BStd.Tid.Set.empty, false, visited)
      in
      let pop_set, has_pop = (match has_pop with
          | false -> (* No rbp pop below, we seek rbp pops in this block *)
            (match block_find_rbp_pop block with
                | None -> pop_set, false
                | Some tid -> BStd.Tid.Set.add pop_set tid, true
            )
          | true -> pop_set, has_pop) in
      pop_set, has_pop, visited
    )
  in
  if __settings.no_rbp_undef then
    BStd.Tid.Set.empty
  else (
    let pop_set, _, _ =
      block_dfs entry (BStd.Blk.Set.empty) in
    pop_set
  )
 let interpret_var_expr c_var offset expr = BStd.Bil.(
    let closed_form = BStd.Exp.substitute
      (var c_var)
@ -212,13 +353,14 @@ let is_single_free_reg expr =
  )
 let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
    allow_rbp
  : (reg_pos option * block_local_state) =
  let lose_track = Some CfaLostTrack in
  let cur_cfa, cur_rbp = cur_reg in
  let out_cfa =
-    (match cur_cfa, Regs.X86_64.of_var (BStd.Def.lhs def) with
+    (match cur_cfa, Regs.X86_64.of_var (BStd.Def.lhs def), allow_rbp with
-     | RspOffset(cur_offset), Some reg when reg = Regs.X86_64.rsp ->
+     | RspOffset(cur_offset), Some reg, _ when reg = Regs.X86_64.rsp ->
       let exp = BStd.Def.rhs def in
       (match is_single_free_reg exp with
        | Some (bil_var, dw_var) when dw_var = Regs.X86_64.rsp ->
@ -230,7 +372,7 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
          )
        | _ -> lose_track
       )
-     | RspOffset(cur_offset), Some reg when reg = Regs.X86_64.rbp ->
+     | RspOffset(cur_offset), Some reg, true when reg = Regs.X86_64.rbp ->
       (* We have CFA=rsp+k and a line %rbp <- [expr].
          Might be a %rbp <- %rsp *)
       let exp = BStd.Def.rhs def in
@ -251,7 +393,7 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
          )
        | _ -> None
       )
-     | RbpOffset(cur_offset), Some reg when reg = Regs.X86_64.rbp ->
+     | RbpOffset(cur_offset), Some reg, true when reg = Regs.X86_64.rbp ->
       (* Assume we are overwriting %rbp with something — we must revert to
          some rsp-based indexing *)
       (* FIXME don't assume the rsp offset will always be 8, find a smart way
@ -260,6 +402,7 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
          value is read from the stack.
       *)
       Some (RspOffset(Int64.of_int 16))
     | RbpOffset _, _, false -> assert false
     | _ -> None
    ) in
@ -288,25 +431,6 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
     (has_rbp_var || has_intermed_rbp_var))
  in
  let is_pop_expr expr =
    let free_vars = BStd.Exp.free_vars expr in
    let free_x86_regs = Regs.X86_64.map_varset free_vars in
    (match Regs.DwRegOptSet.cardinal free_x86_regs with
     | 2 ->
       let reg = free_x86_regs
                 |> Regs.DwRegOptSet.filter
                   (fun x -> match x with None -> false | Some _ -> true)
                 |> Regs.DwRegOptSet.choose in
       let has_mem_var = BStd.Var.Set.exists
         ~f:(fun x -> BStd.Var.name x = "mem")
         free_vars in
       (match reg, has_mem_var with
        | Some dw_var, true when dw_var = Regs.X86_64.rsp -> true
        | _ -> false)
     | _ -> false
    )
  in
  let is_rbp_expr expr =
    let free_vars = BStd.Exp.free_vars expr in
    let free_x86_regs = Regs.X86_64.map_varset free_vars in
@ -369,13 +493,14 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
       new_rbp, cur_state
     | RbpCfaOffset offs ->
-       (* We go back to RbpUndef when encountering something like a `pop rbp`,
+       (* We go back to RbpUndef if the current def is in the rbp_pop_set --
-          that is, RBP <- f(RSP, mem) *)
+          see `find_rbp_pop_set` *)
-       (match Regs.X86_64.of_var (BStd.Def.lhs def),
+
-              is_pop_expr @@ BStd.Def.rhs def with
+       (match BStd.Tid.Set.mem (local_state.rbp_pop_set) @@ BStd.Term.tid def
-       | Some reg, true when reg = Regs.X86_64.rbp ->
+            with
       | true ->
         Some RbpUndef, local_state
-       | _ -> None, local_state
+       | false -> None, local_state
       )
    )
  in
@ -404,7 +529,8 @@ let process_jmp jmp (cur_reg: reg_pos)
  | _ -> None
 let process_blk
-    next_instr_graph (block_init: reg_pos) blk : (reg_changes_fde * reg_pos) =
+    next_instr_graph rbp_pop_set allow_rbp (block_init: reg_pos) blk
  : (reg_changes_fde * reg_pos) =
  (** Extracts the registers (CFA+RBP) changes of a block. *)
  let apply_offset cur_addr_opt ((accu:reg_changes_fde), cur_reg, local_state)
@ -424,7 +550,8 @@ let process_blk
  let fold_elt (accu, cur_reg, cur_local_state) elt = match elt with
    | `Def(def) ->
-      let new_offset, new_state = process_def cur_local_state def cur_reg in
+      let new_offset, new_state =
        process_def cur_local_state def cur_reg allow_rbp in
      apply_offset
        (opt_addr_of def) (accu, cur_reg, new_state) new_offset
    | `Jmp(jmp) ->
@ -442,7 +569,8 @@ let process_blk
    ) in
  let empty_local_state = {
-    rbp_vars = BStd.Var.Set.empty
+    rbp_vars = BStd.Var.Set.empty;
    rbp_pop_set = rbp_pop_set
  } in
  let elts_seq = BStd.Blk.elts blk in
  let out_reg, end_reg, _ = BStd.Seq.fold elts_seq
@ -525,18 +653,21 @@ let cleanup_fde (fde_changes: reg_changes_fde) : reg_changes_fde =
  match AddrMap.fold fold_one fde_changes (AddrMap.empty, None, false) with
  | out, _, _ -> out
-let process_sub sub : subroutine_cfa_data =
+let process_sub sub next_instr_graph : subroutine_cfa_data =
  (** Extracts the `cfa_changes_fde` of a subroutine *)
  let cfg = BStd.Sub.to_cfg sub in
  let next_instr_graph = build_next_instr cfg in
  let first_addr = int64_addr_of sub in
  let last_addr = find_last_addr sub in
  let initial_cfa_rsp_offset = Int64.of_int 8 in
  let entry_blk = get_entry_blk cfg in
  let rbp_pop_set = find_rbp_pop_set cfg entry_blk in
  let rec dfs_process
      allow_rbp
      (sub_changes: (reg_changes_fde * reg_pos) TIdMap.t)
      node
      (entry_offset: reg_pos) =
@ -549,23 +680,56 @@ let process_sub sub : subroutine_cfa_data =
    | None ->
      (* Not yet visited: compute the changes *)
      let cur_blk_changes, end_reg =
-        process_blk next_instr_graph entry_offset cur_blk in
+        process_blk next_instr_graph rbp_pop_set
          allow_rbp entry_offset cur_blk in
      let n_sub_changes =
        TIdMap.add tid (cur_blk_changes, entry_offset) sub_changes in
      BStd.Seq.fold (CFG.Node.succs node cfg)
-        ~f:(fun accu child -> dfs_process accu child end_reg)
+        ~f:(fun accu child -> dfs_process allow_rbp accu child end_reg)
        ~init:n_sub_changes
    | Some (_, former_entry_offset) ->
      (* Already visited: check that entry values are matching *)
-      if entry_offset <> former_entry_offset then
+      if entry_offset <> former_entry_offset then (
        if allow_rbp then
          Format.eprintf "Found inconsistency (0x%Lx): %a -- %a@."
            (int64_addr_of cur_blk)
            pp_reg_pos entry_offset pp_reg_pos former_entry_offset ;
        raise (Inconsistent tid)
      )
      else
        sub_changes
  in
-  let entry_blk = get_entry_blk cfg in
+  let with_rbp_if_needed initial_offset =
    (* Tries first without allowing CFA=rbp+k, then allowing it if the first
       result was either inconsistent or lost track *)
    let not_losing_track synth_result =
      let lost_track = TIdMap.exists
          (fun _ (_, (cfa_pos, _)) -> match cfa_pos with
             | CfaLostTrack -> true
             | _ -> false) synth_result
      in
      (match lost_track with
       | true -> None
       | false -> Some synth_result)
    in
    let without_rbp =
      (try
         dfs_process false TIdMap.empty entry_blk initial_offset
         |> not_losing_track
       with Inconsistent _ -> None
      )
    in
    (match without_rbp with
     | Some correct_res -> correct_res
     | None ->
       dfs_process true TIdMap.empty entry_blk initial_offset)
  in
  let initial_offset = (RspOffset initial_cfa_rsp_offset, RbpUndef) in
-  let changes_map = dfs_process TIdMap.empty entry_blk initial_offset in
+  (* Try first without rbp, then with rbp upon failure *)
  let changes_map = with_rbp_if_needed initial_offset in
  let merged_changes = TIdMap.fold
      (fun _ (cfa_changes, _) accu -> AddrMap.union (fun _ v1 v2 ->
@ -587,12 +751,16 @@ let process_sub sub : subroutine_cfa_data =
  output
-let of_prog prog : subroutine_cfa_map =
+let of_prog prog next_instr_graph : subroutine_cfa_map =
  (** Extracts the `cfa_changes` of a program *)
  let fold_step accu sub =
    (try
-       let subroutine_data = process_sub sub in
+       (match is_ghost_sub sub with
-       StrMap.add (BStd.Sub.name sub) subroutine_data accu
+        | true -> accu
        | false ->
          let subroutine_data = process_sub sub next_instr_graph in
          StrMap.add (BStd.Sub.name sub) subroutine_data accu
       )
     with
     | InvalidSub -> accu
     | Inconsistent tid ->
@ -606,10 +774,34 @@ let of_prog prog : subroutine_cfa_map =
    ~init:StrMap.empty
    ~f:fold_step
-let of_proj proj : subroutine_cfa_map =
+let build_sub_ranges prog: (memory_address) AddrMap.t =
  (** Builds a map mapping the first address of each subroutine to its last
      address. This map can be interpreted as a list of address ranges with
      easy fast access to a member (cf Map.S.find_first) *)
  let fold_subroutine accu sub =
    (match is_ghost_sub sub with
     | true -> accu
     | false ->
       let first_addr = int64_addr_of sub in
       let last_addr = find_last_addr sub in
       AddrMap.add first_addr (last_addr) accu
    )
  in
  let subroutines = BStd.Term.enum BStd.sub_t prog in
  BStd.Seq.fold subroutines
    ~init:AddrMap.empty
    ~f:fold_subroutine
 let of_proj no_rbp_undef proj : subroutine_cfa_map =
  (** Extracts the `cfa_changes` of a project *)
  __settings.no_rbp_undef <- no_rbp_undef ;
  let prog = BStd.Project.program proj in
-  of_prog prog
+  let sub_ranges = build_sub_ranges prog in
  let next_instr_graph =
    build_next_instr sub_ranges (BStd.Project.disasm proj) in
  of_prog prog next_instr_graph
 let clean_lost_track_subs pre_dwarf : subroutine_cfa_map =
  (** Removes the subroutines on which we lost track from [pre_dwarf] *)
--- a/csmith/.gitignore
+++ b/csmith/.gitignore
@ -0,0 +1 @@
 0[0-9]/
--- a/csmith/check_generated_eh_frame.py
+++ b/csmith/check_generated_eh_frame.py
@ -32,10 +32,105 @@ def parse_fde_head(line):
    return pc_beg, pc_end
-def parse_fde_row(line, ra_col):
+def detect_clang_flat_to_pyramid(rows):
    """ Artificially repair clang flat callee-saved saving to a gcc pyramid-like shape.
    Eg., clang will generate
       LOC           CFA      rbx   rbp   ra
    0000000000007180 rsp+8    u     u     c-8
    0000000000007181 rsp+16   u     u     c-8
    0000000000007182 rsp+24   u     u     c-8
    0000000000007189 rsp+7632 c-24  c-16  c-8
    while we would wish to have
       LOC           CFA      rbx   rbp   ra
    0000000000007180 rsp+8    u     u     c-8
    0000000000007181 rsp+16   u     c-16  c-8
    0000000000007182 rsp+24   c-24  c-16  c-8
    0000000000007189 rsp+7632 c-24  c-16  c-8
    The repair strategy is as follows:
    - ignore the implicit first row
    - find the first k lines such that only CFA changes from line to line, with a delta
      of +8, with address delta of 2. (push is 2 bytes long)
    - for every callee-saved R that concerns us and that is defined at line k+1 with
      offset c-x, while rsp+x is the CFA of line k' <= k, redefine R as c-k in lines
      [k'; k[
    """
    def try_starting_at(start_row):
        if len(rows) < start_row + 1:  # Ensure we have at least the start row
            return rows, False
        flatness_row_id = start_row
        if rows[1]["CFA"][:4] != "rsp+":
            return rows, False
        first_cfa = int(rows[start_row]["CFA"][4:])
        prev_cfa = first_cfa
        prev_loc = rows[start_row]["LOC"]
        for row in rows[start_row + 1 :]:
            for reg in row:
                if reg not in ["LOC", "CFA", "ra"] and row[reg] != "u":
                    break
            cfa = row["CFA"]
            if cfa[:4] != "rsp+":
                break
            cfa_offset = int(cfa[4:])
            if cfa_offset != prev_cfa + 8:
                break
            prev_cfa += 8
            loc = row["LOC"]
            if loc > prev_loc + 2:
                break
            prev_loc = loc
            flatness_row_id += 1
        flatness_row_id += 1
        if flatness_row_id - start_row <= 1 or flatness_row_id >= len(rows):
            return rows, False  # nothing to change
        flatness_row = rows[flatness_row_id]
        reg_changes = {}
        for reg in flatness_row:
            if reg in ["LOC", "CFA", "ra"]:
                continue
            rule = flatness_row[reg]
            if rule[:2] != "c-":
                return rows, False  # Not a flat_to_pyramid after all
            rule_offset = int(rule[2:])
            rule_offset_rectified = rule_offset - first_cfa
            if rule_offset_rectified % 8 != 0:
                return rows, False
            row_change_id = rule_offset_rectified // 8 + start_row
            reg_changes[reg] = (row_change_id, rule)
        for reg in reg_changes:
            change_from, rule = reg_changes[reg]
            for row in rows[change_from:flatness_row_id]:
                row[reg] = rule
        return rows, True
    for start_row in [1, 2]:
        mod_rows, modified = try_starting_at(start_row)
        if modified:
            return mod_rows
    return rows
 def parse_fde_row(line, reg_cols):
    vals = list(map(lambda x: x.strip(), line.split()))
-    assert len(vals) > ra_col  # ra is the rightmost useful column
+    assert len(vals) > reg_cols["ra"]  # ra is the rightmost useful column
-    out = {"LOC": int(vals[0], 16), "CFA": vals[1], "ra": vals[ra_col]}
+
    out = {"LOC": int(vals[0], 16), "CFA": vals[1]}
    for reg in reg_cols:
        col_id = reg_cols[reg]
        out[reg] = vals[col_id]
    if "rbp" not in out:
        out["rbp"] = "u"
    return out
@ -47,7 +142,14 @@ def clean_rows(rows):
    out_rows = [rows[0]]
    for row in rows[1:]:
        if not row == out_rows[-1]:
-            out_rows.append(row)
+            filtered_row = row
            filter_out = []
            for reg in filtered_row:
                if reg not in ["LOC", "CFA", "rbp", "ra"]:
                    filter_out.append(reg)
            for reg in filter_out:
                filtered_row.pop(reg)
            out_rows.append(filtered_row)
    return out_rows
@ -58,14 +160,28 @@ def parse_fde(lines):
    except NotFDE:
        return
-    rows = [{"LOC": 0, "CFA": "rsp+8", "ra": "c-8"}]  # Implicit CIE row
+    rows = [{"LOC": 0, "CFA": "rsp+8", "rbp": "u", "ra": "c-8"}]  # Implicit CIE row
    if len(lines) >= 2:  # Has content
        head_row = list(map(lambda x: x.strip(), lines[1].split()))
-        ra_col = head_row.index("ra")
+        reg_cols = {}
        for pos, reg in enumerate(head_row):
            if reg not in ["LOC", "CFA"]:
                reg_cols[reg] = pos
        for line in lines[2:]:
-            rows.append(parse_fde_row(line, ra_col))
+            rows.append(parse_fde_row(line, reg_cols))
    # if pc_beg == 0x1160:
    #     print("===== FDE: {}..{} ====".format(hex(pc_beg), hex(pc_end)))
    #     print("BEFORE:")
    #     for row in rows:
    #         print(row)
    rows = detect_clang_flat_to_pyramid(rows)
    # if pc_beg == 0x1160:
    #     print("AFTER:")
    #     for row in rows:
    #         print(row)
    return {"beg": pc_beg, "end": pc_end, "rows": clean_rows(rows)}
@ -151,7 +267,7 @@ def dump_light_fdes(fdes):
 def match_fde(orig, synth):
    def vals_of(row):
-        return {"CFA": row["CFA"], "ra": row["ra"]}
+        return {"CFA": row["CFA"], "ra": row["ra"], "rbp": row["rbp"]}
    def loc_of(rch):
        return rch[1]["LOC"]
@ -165,7 +281,7 @@ def match_fde(orig, synth):
            rowchanges.append((typ, row))
    rowchanges.sort(key=loc_of)
-    matching = True
+    mismatch_count = 0
    for rowid, rowch in enumerate(rowchanges):
        typ, row = rowch[0], rowch[1]
        cur_val[typ] = vals_of(row)
@ -180,12 +296,17 @@ def match_fde(orig, synth):
                        hex(row["LOC"]), cur_val[0], cur_val[1]
                    )
                )
-            matching = False
+            mismatch_count += 1
-    return matching
+    return mismatch_count
 def parse_sym_table(handle):
    def readint(x):
        if x.startswith("0x"):
            return int(x[2:], 16)
        return int(x)
    out_map = {}
    for line in handle:
        line = line.strip()
@ -194,7 +315,7 @@ def parse_sym_table(handle):
        spl = list(map(lambda x: x.strip(), line.split()))
        loc = int(spl[1], 16)
-        size = int(spl[2])
+        size = readint(spl[2])
        name = spl[7]
        out_map[name] = (loc, size)
    return out_map
@ -227,15 +348,30 @@ def main():
    mismatches = 0
    for (orig, synth) in matched:
-        if not match_fde(orig, synth):
+        mismatches += match_fde(orig, synth)
            mismatches += 1
    reports = []
    if mismatches > 0:
        reports.append("{} mismatches".format(mismatches))
    if unmatched_orig:
-        reports.append("{} unmatched (orig)".format(len(unmatched_orig)))
+        worth_reporting = False
        for unmatched in unmatched_orig:
            if len(unmatched["rows"]) > 1:
                worth_reporting = True
                break
        if worth_reporting:
            unmatched_addrs = [fde_pos(fde) for fde in unmatched_orig]
            reports.append(
                "{} unmatched (orig): {}".format(
                    len(unmatched_orig), ", ".join(unmatched_addrs)
                )
            )
    if unmatched_synth:
-        reports.append("{} unmatched (synth)".format(len(unmatched_synth)))
+        unmatched_addrs = [fde_pos(fde) for fde in unmatched_synth]
        reports.append(
            "{} unmatched (synth): {}".format(
                len(unmatched_synth), ", ".join(unmatched_addrs)
            )
        )
    if reports:
        print("{}: {}".format(test_name, "; ".join(reports)))
--- a/csmith/csmith_gen.sh
+++ b/csmith/csmith_gen.sh
@ -1,6 +1,12 @@
 #!/bin/bash
-USAGE="$0 output_dir nb_tests"
+USAGE="$0 output_dir nb_tests
 You may also set COMPILE_CMD to eg. 'gcc -O2' if you want to override the
 default command."
 if [ -z "$COMPILE_CMD" ] ; then
    COMPILE_CMD='gcc -O2'
 fi
 if [ "$#" -lt 2 ] ; then
    >&2 echo -e "Missing argument(s). Usage:\n$USAGE"
@ -28,11 +34,11 @@ for _num in $(seq 1 $NB_TESTS); do
    csmith > "$path.c"
    sed -i 's/^static \(.* func_\)/\1/g' "$path.c"
    echo -ne "\r>>> $num.bin          "
-    gcc -O2 -I/usr/include/csmith-2.3.0/ -w "$path.c" -o "$path.orig.bin"
+    $COMPILE_CMD -I/usr/include/csmith-2.3.0/ -w "$path.c" -o "$path.orig.bin"
    objcopy --remove-section '.eh_frame' --remove-section '.eh_frame_hdr' \
        "$path.orig.bin" "$path.bin"
    echo -ne "\r>>> $num.eh.bin          "
-    ../synthesize_dwarf.sh "$path.bin" "$path.eh.bin"
+    BAP_ARGS='--dwarfsynth-no-rbp-undef' ../synthesize_dwarf.sh "$path.bin" "$path.eh.bin"
    if [ "$check_gen_eh_frame" -gt 0 ] ; then
        ./check_generated_eh_frame.sh "$path"
--- a/curiosities/.gitignore
+++ b/curiosities/.gitignore
@ -0,0 +1 @@
 *.bin
--- a/curiosities/gccO2_join_fail/24.c
+++ b/curiosities/gccO2_join_fail/24.c
--- a/curiosities/mov_rsp_rbp_falsepos/72.c
+++ b/curiosities/mov_rsp_rbp_falsepos/72.c
--- a/dwarfsynth.ml
+++ b/dwarfsynth.ml
@ -20,8 +20,18 @@ module Cmdline = struct
        ~default:"tmp.marshal"
    )
  let no_rbp_undef = Cnf.(
      param (bool) "no-rbp-undef"
        ~doc:("Do not unset %rbp after it has been set once in a FDE. "
              ^"This mimics gcc eh_frame for ease of validation.")
        ~as_flag:true
        ~default:false
    )
  let () = Cnf.(
      when_ready ((fun {get=(!!)} ->
-          Bap.Std.Project.register_pass' (main !!outfile)))
+          Bap.Std.Project.register_pass' (main
                                            ~no_rbp_undef:!!no_rbp_undef
                                            !!outfile )))
    )
 end
--- a/synthesize_dwarf.sh
+++ b/synthesize_dwarf.sh
@ -35,7 +35,9 @@ function find_ml_dwarf_write {
 }
 function bap_synth {
-    bap "$INPUT_FILE" --no-byteweight -p dwarfsynth --dwarfsynth-output "$TMP_DIR/marshal" \
+    bap "$INPUT_FILE" \
        --no-byteweight -p dwarfsynth \
        --dwarfsynth-output "$TMP_DIR/marshal" $BAP_ARGS \
        > /dev/null
    return $?
 }
--- a/test/.gitignore
+++ b/test/.gitignore
@ -0,0 +1 @@
 *.bin
--- a/test/inline_asm.c
+++ b/test/inline_asm.c
@ -0,0 +1,19 @@
 #include <stdio.h>
 #include <stdlib.h>
 int main(int argc, char** argv) {
    if(argc < 2) {
        fprintf(stderr, "Missing argument: loop upper bound.\n");
        exit(1);
    }
    int upper_bound = atoi(argv[1]);
    int count = 0;
    for(int i=0; i < upper_bound; ++i) {
        __asm__("sub $8, %rsp; movq $42, (%rsp)");
        count++;
        __asm__("add $8, %rsp");
    }
    printf("%d\n", count);
    return 0;
 }
--- a/test/without_rbp_col.c
+++ b/test/without_rbp_col.c
@ -0,0 +1,28 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 void rbp_bump_2(int z) {
    for(int cz=1; cz < z; cz++) {
        int x[cz];
        x[cz / 2] = 8;
    }
 }
 void rbp_bump_1(int y) {
    for(int cy=1; cy < y; cy++) {
        int x[cy];
        x[cy / 2] = 8;
        rbp_bump_2(x[cy/2]);
    }
 }
 int main(int argc, char** argv) {
    if(argc < 2) {
        fprintf(stderr, "Missing argument: n\n");
        return 1;
    }
    int num = atoi(argv[1]);
    rbp_bump_1(num);
    return 0;
 }
Author	SHA1	Message	Date
Théophile Bastian	6df057260d	Add uncommitted tests and curiosities	2019-04-05 16:39:21 +02:00
Théophile Bastian	11fa3fab40	Check_gen: patch for flat vs pyramid clang tables clang generates tables like this LOC CFA rbx rbp r14 r15 ra 00000000000028a0 rsp+8 u u u u c-8 00000000000028a1 rsp+16 u u u u c-8 00000000000028a3 rsp+24 u u u u c-8 00000000000028a5 rsp+32 u u u u c-8 00000000000028a6 rsp+40 u u u u c-8 00000000000028aa rsp+64 c-40 c-16 c-32 c-24 c-8	2019-04-05 16:32:00 +02:00
Théophile Bastian	344ac84ef3	Ignore ghost subroutines for clang A ghost subroutine is a subroutine having, in the IR representation, no content. At clang -O0, some might be generated, eg. <foo_func>: foo_addr nop which translates to Empty in BIL.	2019-04-05 11:23:18 +02:00
Théophile Bastian	6c18d9f537	Use rbp only on the subs where we need it	2019-04-05 11:23:13 +02:00
Théophile Bastian	29ab916c55	Fix next_instr_graph out-of-subroutine pointers The disasm-based next_instr_graph would introduce next instructions out of the current subroutine for eg. calls, jmp to plts, etc.	2019-04-04 19:47:36 +02:00
Théophile Bastian	5f7dfb6f5f	Use disasm to generate next_instr_graph	2019-04-04 13:49:39 +02:00
Théophile Bastian	3d336de196	Add flag to never go back to undefined rbp Once rbp has been set in the DWARF, if this flag is set, nothing will remove it from the table. This mimicks gcc and allows us to check easily our tables against theirs.	2019-04-04 11:52:47 +02:00
Théophile Bastian	4313ee91a7	Curiosities: `mov %rsp, %rbp` not switching to rbp tracking	2019-04-04 11:51:57 +02:00
Théophile Bastian	c3050da113	Csmith checker: check rbp column	2019-04-03 18:20:03 +02:00
Théophile Bastian	5d6929f84c	csmith_gen: add COMPILE_CMD option	2019-04-03 12:22:07 +02:00