Add uncommitted tests and curiosities

Check_gen: patch for flat vs pyramid clang tables
clang generates tables like this LOC CFA rbx rbp r14 r15 ra 00000000000028a0 rsp+8 u u u u c-8 00000000000028a1 rsp+16 u u u u c-8 00000000000028a3 rsp+24 u u u u c-8 00000000000028a5 rsp+32 u u u u c-8 00000000000028a6 rsp+40 u u u u c-8 00000000000028aa rsp+64 c-40 c-16 c-32 c-24 c-8
2019-04-05 16:39:21 +02:00 · 2019-04-05 16:32:00 +02:00 · 2019-04-05 11:23:18 +02:00 · 2019-04-05 11:23:13 +02:00 · 2019-04-04 19:47:36 +02:00 · 2019-04-04 13:49:39 +02:00
13 changed files with 5754 additions and 125 deletions
--- a/DwarfSynth/Main.ml
+++ b/DwarfSynth/Main.ml
@ -1,8 +1,8 @@
 open Std

-let main outfile proj =
+let main ?no_rbp_undef:(no_rbp_undef=false) outfile proj =
  let pre_dwarf = proj
-                  |> Simplest.of_proj
+                  |> Simplest.of_proj no_rbp_undef
                  |> Simplest.clean_lost_track_subs in
  Format.printf "%a" Frontend.pp_pre_dwarf_readelf pre_dwarf;
  let pre_c_dwarf = PreCBinding.convert_pre_c pre_dwarf in
--- a/DwarfSynth/Simplest.ml
+++ b/DwarfSynth/Simplest.ml
@ -27,7 +27,8 @@ type subroutine_cfa_data = {
 }

 type block_local_state = {
-  rbp_vars: BStd.Var.Set.t
+  rbp_vars: BStd.Var.Set.t;
+  rbp_pop_set: BStd.Tid.Set.t
 }

 module StrMap = Map.Make(String)
@ -38,11 +39,26 @@ module TIdMap = Map.Make(BStd.Tid)
 exception InvalidSub
 exception UnexpectedRbpSet

+type synthesis_settings = {
+    mutable no_rbp_undef: bool
+  }
+
+let __settings = {
+  no_rbp_undef = false
+}
+
 let pp_cfa_pos ppx = function
  | RspOffset off -> Format.fprintf ppx "RSP + (%s)" (Int64.to_string off)
  | RbpOffset off -> Format.fprintf ppx "RBP + (%s)" (Int64.to_string off)
  | CfaLostTrack -> Format.fprintf ppx "??@."

+let pp_rbp_pos ppx = function
+  | RbpUndef -> Format.fprintf ppx "u"
+  | RbpCfaOffset off -> Format.fprintf ppx "c%+Ld" off
+
+let pp_reg_pos ppx (cfa_pos, rbp_pos) =
+  Format.fprintf ppx "(%a; %a)" pp_cfa_pos cfa_pos pp_rbp_pos rbp_pos
+
 let pp_int64_hex ppx number =
  let mask_short = Int64.(pred (shift_left one 16)) in
  let pp_short number =
@ -82,6 +98,15 @@ let opt_addr_of_blk_elt = function
  | `Jmp jmp -> opt_addr_of jmp
  | `Phi phi -> opt_addr_of phi

+let is_ghost_sub sub =
+  (** Check whether the subroutine has content *)
+  let is_ghost_block blk =
+    BStd.Blk.elts blk
+    |> BStd.Seq.is_empty
+  in
+  let blk_seq = BStd.Term.enum BStd.blk_t sub in
+  BStd.Seq.for_all blk_seq ~f:is_ghost_block
+
 let entrypoint_address blk =
  (** Find the first instruction address in the current block.
      Return None if no instruction has address.  *)
@ -104,79 +129,195 @@ let map_option f = function
  | None -> None
  | Some x -> Some (f x)

-let build_next_instr graph =
+exception Block_not_in_subroutine
+
+let build_next_instr sub_ranges (disasm: BStd.disasm): AddrSet.t AddrMap.t =
  (** Build a map of memory_address -> AddrSet.t holding, for each address, the
      set of instructions coming right after the instruction at given address.
      There might be multiple such addresses, if the current instruction is at
      a point of branching.  *)

-  let addresses_in_block blk =
-    (** Set of addresses present in the block *)
-    BStd.Seq.fold (BStd.Blk.elts blk)
-      ~init:AddrSet.empty
-      ~f:(fun accu elt ->
-          let addr = opt_addr_of_blk_elt elt in
-          match addr with
-          | None -> accu
-          | Some x ->
-            (try
-               AddrSet.add (BStd.Word.to_int64_exn x) accu
-             with _ -> accu)
-        )
+  let rec build_of_instr_list cur_map = function
+    (** Maps an instruction to its following instruction in this block *)
+    | (cur_mem, cur_insn) :: ((next_mem, next_insn) as elt2) :: tl ->
+      (* Its only successor is next_insn *)
+      let new_map =
+        (try
+           let cur_addr = to_int64_addr @@ BStd.Memory.min_addr cur_mem
+           and next_addr = to_int64_addr @@ BStd.Memory.min_addr next_mem in
+           AddrMap.add cur_addr (AddrSet.singleton next_addr) cur_map
+         with _ -> cur_map)
+        in
+        build_of_instr_list new_map (elt2 :: tl)
+    | (cur_mem, cur_insn) :: [] ->
+      let last_addr =
+        (try Some (to_int64_addr @@ BStd.Memory.min_addr cur_mem)
+         with _ -> None) in
+      cur_map, last_addr
+
+      (* Ignore the last one: its successors are held in the graph *)
+    | [] ->
+      cur_map, None
  in

-  let node_successors_addr (nd: CFG.node) : AddrSet.t =
-    let rec do_find_succ accu nd =
-      let fold_one accu c_node =
-        match entrypoint_address (CFG.Node.label c_node) with
-        | Some addr ->
-          (try
-             AddrSet.add (BStd.Word.to_int64_exn addr) accu
-           with _ -> accu)
-        | None -> do_find_succ accu c_node
-      in
+  let cfg = BStd.Disasm.cfg disasm in

-      let succ = CFG.Node.succs nd graph in
-      BStd.Seq.fold succ
-        ~init:accu
-        ~f:fold_one
-    in
-    do_find_succ AddrSet.empty nd
+  let rec block_addresses block =
+    (try BStd.Block.addr block
+         |> to_int64_addr
+         |> AddrSet.singleton
+     with _ ->
+       (* Probably an intermediary node, eg. JMP --> [inermed node] --> BLK *)
+       let outputs = BStd.Graphs.Cfg.Node.outputs block cfg
+                     |> BStd.Seq.map ~f:BStd.Graphs.Cfg.Edge.dst in
+       BStd.Seq.fold outputs
+         ~init:AddrSet.empty
+         ~f:(fun accu block -> AddrSet.union (block_addresses block) accu)
+    )
  in

-  let build_of_block accu_map node =
-    let blk = CFG.Node.label node in
-    let node_successors = node_successors_addr node in
-    let instr_addresses = AddrSet.elements @@ addresses_in_block blk in
+  let build_of_block cur_map block =
+    (try
+       (* First, check that this block belongs to a subroutine *)
+       let block_first_address = (
+         try
+           to_int64_addr @@ BStd.Block.addr block
+         with _ -> raise Block_not_in_subroutine) in
+       let sub_first_addr, sub_last_addr = (
+         try AddrMap.find_last
+               (fun start_addr -> start_addr <= block_first_address) sub_ranges
+         with Not_found ->
+           raise Block_not_in_subroutine
+       ) in

-    let rec accumulate_mappings mappings addr_list = function
-      | None -> mappings
-      | Some (instr, instr_seq) as cur_instr ->
-        let instr_addr = opt_addr_of_blk_elt instr in
-        match (map_option to_int64_addr instr_addr), addr_list with
-        | None, _ ->
-          accumulate_mappings mappings addr_list @@ BStd.Seq.next instr_seq
-        | Some cur_addr, next_addr::t when cur_addr >= next_addr ->
-          accumulate_mappings mappings t cur_instr
-        | Some cur_addr, next_addr::_ ->
-          let n_mappings = AddrMap.add
-              cur_addr (AddrSet.singleton next_addr) mappings in
-          accumulate_mappings n_mappings addr_list @@ BStd.Seq.next instr_seq
-        | Some cur_addr, [] ->
-          let n_mappings = AddrMap.add
-              cur_addr node_successors mappings in
-          accumulate_mappings n_mappings addr_list @@ BStd.Seq.next instr_seq
-    in
-    accumulate_mappings
-      accu_map
-      instr_addresses
-      (BStd.Seq.next @@ BStd.Blk.elts blk)
+       (* Add the sequence of instuctions inside the block itself *)
+       let cur_map, last_addr =
+         build_of_instr_list cur_map (BStd.Block.insns block) in
+
+       (* Then the set of possible destinations for the block terminator *)
+       (match last_addr with
+        | Some last_addr ->
+          let following_set = BStd.Graphs.Cfg.Node.outputs block cfg
+                              |> BStd.Seq.fold
+                                ~init:AddrSet.empty
+                                ~f:(fun set edge -> AddrSet.union
+                                       (block_addresses
+                                          (BStd.Graphs.Cfg.Edge.dst edge))
+                                       set)
+                              |> AddrSet.filter (fun addr ->
+                                  sub_first_addr <= addr
+                                  && addr <= sub_last_addr)
+                                (* ^ We must ensure the landing address belongs
+                                   to the current subroutine for our purpose *)
+          in
+          AddrMap.add last_addr following_set cur_map
+        | None -> cur_map
+       )
+     with Block_not_in_subroutine ->
+       cur_map
+    )
  in

-  BStd.Seq.fold (CFG.nodes graph)
+  BStd.Seq.fold (BStd.Graphs.Cfg.nodes cfg)
    ~init:AddrMap.empty
    ~f:build_of_block

+let find_rbp_pop_set cfg entry =
+  (** Returns a BStd.Tid.Set.t of the terms actually "popping" %rbp, that is,
+      the terms that should trigger a change to RbpUndef of the %rbp register.
+      The current heuristic is to consider the expressions
+       i) of the form `rbp = F(mem, rsp)` (alledgedly, rbp = something from the
+        stack);
+      ii) that are the last of this kind in the subroutine's CFG (ie. such that
+        there is not another instruction matching (i) that is reachable through
+        the CFG from the current instruction).
+  *)
+
+  let def_is_rbp_pop def =
+    let is_pop_expr expr =
+      let free_vars = BStd.Exp.free_vars expr in
+      let free_x86_regs = Regs.X86_64.map_varset free_vars in
+      (match Regs.DwRegOptSet.cardinal free_x86_regs with
+       | 2 ->
+         let reg = free_x86_regs
+                   |> Regs.DwRegOptSet.filter
+                     (fun x -> match x with None -> false | Some _ -> true)
+                   |> Regs.DwRegOptSet.choose in
+         let has_mem_var = BStd.Var.Set.exists
+             ~f:(fun x -> BStd.Var.name x = "mem")
+             free_vars in
+         (match reg, has_mem_var with
+          | Some dw_var, true when dw_var = Regs.X86_64.rsp -> true
+          | _ -> false)
+       | _ -> false
+      )
+    in
+
+    (match Regs.X86_64.of_var (BStd.Def.lhs def),
+           is_pop_expr @@ BStd.Def.rhs def with
+    | Some reg, true when reg = Regs.X86_64.rbp -> true
+    | _ -> false
+    )
+  in
+
+
+  let block_find_rbp_pop block =
+    let fold_elt = function
+      | `Def(def) when (def_is_rbp_pop def) -> Some (BStd.Term.tid def)
+      | _ -> None
+    in
+
+    let elts_seq = BStd.Blk.elts block in
+    let last_pop = BStd.Seq.fold elts_seq
+        ~init:None
+        ~f:(fun accu elt ->
+            (match fold_elt elt with
+             | None -> accu
+             | Some tid -> Some tid))
+    in
+    last_pop
+  in
+
+  let rec block_dfs node visited =
+    (* DFS on the CFG to find rbp pops, and rule out those that are not final
+     *)
+    let block = CFG.Node.label node in
+    (match BStd.Blk.Set.mem visited block with
+    | true ->
+      (* Loop: we already visited this node *)
+      BStd.Tid.Set.empty, true, visited
+    | false ->
+      let visited = BStd.Blk.Set.add visited block in
+      let pop_set, has_pop, visited =
+        BStd.Seq.fold (CFG.Node.succs node cfg)
+          ~f:(fun (pre_pop_set, pre_has_pop, visited) child ->
+              let cur_pop_set, cur_has_pop, visited =
+                block_dfs child visited in
+              (BStd.Tid.Set.union pre_pop_set cur_pop_set),
+              (pre_has_pop || cur_has_pop),
+              visited
+            )
+          ~init:(BStd.Tid.Set.empty, false, visited)
+      in
+      let pop_set, has_pop = (match has_pop with
+          | false -> (* No rbp pop below, we seek rbp pops in this block *)
+            (match block_find_rbp_pop block with
+                | None -> pop_set, false
+                | Some tid -> BStd.Tid.Set.add pop_set tid, true
+            )
+          | true -> pop_set, has_pop) in
+      pop_set, has_pop, visited
+    )
+  in
+
+  if __settings.no_rbp_undef then
+    BStd.Tid.Set.empty
+  else (
+    let pop_set, _, _ =
+      block_dfs entry (BStd.Blk.Set.empty) in
+    pop_set
+  )
+
 let interpret_var_expr c_var offset expr = BStd.Bil.(
    let closed_form = BStd.Exp.substitute
      (var c_var)
@ -212,13 +353,14 @@ let is_single_free_reg expr =
  )

 let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
+    allow_rbp
  : (reg_pos option * block_local_state) =
  let lose_track = Some CfaLostTrack in

  let cur_cfa, cur_rbp = cur_reg in
  let out_cfa =
-    (match cur_cfa, Regs.X86_64.of_var (BStd.Def.lhs def) with
-     | RspOffset(cur_offset), Some reg when reg = Regs.X86_64.rsp ->
+    (match cur_cfa, Regs.X86_64.of_var (BStd.Def.lhs def), allow_rbp with
+     | RspOffset(cur_offset), Some reg, _ when reg = Regs.X86_64.rsp ->
       let exp = BStd.Def.rhs def in
       (match is_single_free_reg exp with
        | Some (bil_var, dw_var) when dw_var = Regs.X86_64.rsp ->
@ -230,7 +372,7 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
          )
        | _ -> lose_track
       )
-     | RspOffset(cur_offset), Some reg when reg = Regs.X86_64.rbp ->
+     | RspOffset(cur_offset), Some reg, true when reg = Regs.X86_64.rbp ->
       (* We have CFA=rsp+k and a line %rbp <- [expr].
          Might be a %rbp <- %rsp *)
       let exp = BStd.Def.rhs def in
@ -251,7 +393,7 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
          )
        | _ -> None
       )
-     | RbpOffset(cur_offset), Some reg when reg = Regs.X86_64.rbp ->
+     | RbpOffset(cur_offset), Some reg, true when reg = Regs.X86_64.rbp ->
       (* Assume we are overwriting %rbp with something — we must revert to
          some rsp-based indexing *)
       (* FIXME don't assume the rsp offset will always be 8, find a smart way
@ -260,6 +402,7 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
          value is read from the stack.
       *)
       Some (RspOffset(Int64.of_int 16))
+     | RbpOffset _, _, false -> assert false
     | _ -> None
    ) in

@ -288,25 +431,6 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)
     (has_rbp_var || has_intermed_rbp_var))
  in

-  let is_pop_expr expr =
-    let free_vars = BStd.Exp.free_vars expr in
-    let free_x86_regs = Regs.X86_64.map_varset free_vars in
-    (match Regs.DwRegOptSet.cardinal free_x86_regs with
-     | 2 ->
-       let reg = free_x86_regs
-                 |> Regs.DwRegOptSet.filter
-                   (fun x -> match x with None -> false | Some _ -> true)
-                 |> Regs.DwRegOptSet.choose in
-       let has_mem_var = BStd.Var.Set.exists
-         ~f:(fun x -> BStd.Var.name x = "mem")
-         free_vars in
-       (match reg, has_mem_var with
-        | Some dw_var, true when dw_var = Regs.X86_64.rsp -> true
-        | _ -> false)
-     | _ -> false
-    )
-  in
-
  let is_rbp_expr expr =
    let free_vars = BStd.Exp.free_vars expr in
    let free_x86_regs = Regs.X86_64.map_varset free_vars in
@ -369,13 +493,14 @@ let process_def (local_state: block_local_state) def (cur_reg: reg_pos)

       new_rbp, cur_state
     | RbpCfaOffset offs ->
-       (* We go back to RbpUndef when encountering something like a `pop rbp`,
-          that is, RBP <- f(RSP, mem) *)
-       (match Regs.X86_64.of_var (BStd.Def.lhs def),
-              is_pop_expr @@ BStd.Def.rhs def with
-       | Some reg, true when reg = Regs.X86_64.rbp ->
+       (* We go back to RbpUndef if the current def is in the rbp_pop_set --
+          see `find_rbp_pop_set` *)
+
+       (match BStd.Tid.Set.mem (local_state.rbp_pop_set) @@ BStd.Term.tid def
+            with
+       | true ->
         Some RbpUndef, local_state
-       | _ -> None, local_state
+       | false -> None, local_state
       )
    )
  in
@ -404,7 +529,8 @@ let process_jmp jmp (cur_reg: reg_pos)
  | _ -> None

 let process_blk
-    next_instr_graph (block_init: reg_pos) blk : (reg_changes_fde * reg_pos) =
+    next_instr_graph rbp_pop_set allow_rbp (block_init: reg_pos) blk
+  : (reg_changes_fde * reg_pos) =
  (** Extracts the registers (CFA+RBP) changes of a block. *)

  let apply_offset cur_addr_opt ((accu:reg_changes_fde), cur_reg, local_state)
@ -424,7 +550,8 @@ let process_blk

  let fold_elt (accu, cur_reg, cur_local_state) elt = match elt with
    | `Def(def) ->
-      let new_offset, new_state = process_def cur_local_state def cur_reg in
+      let new_offset, new_state =
+        process_def cur_local_state def cur_reg allow_rbp in
      apply_offset
        (opt_addr_of def) (accu, cur_reg, new_state) new_offset
    | `Jmp(jmp) ->
@ -442,7 +569,8 @@ let process_blk
    ) in

  let empty_local_state = {
-    rbp_vars = BStd.Var.Set.empty
+    rbp_vars = BStd.Var.Set.empty;
+    rbp_pop_set = rbp_pop_set
  } in
  let elts_seq = BStd.Blk.elts blk in
  let out_reg, end_reg, _ = BStd.Seq.fold elts_seq
@ -525,18 +653,21 @@ let cleanup_fde (fde_changes: reg_changes_fde) : reg_changes_fde =
  match AddrMap.fold fold_one fde_changes (AddrMap.empty, None, false) with
  | out, _, _ -> out

-let process_sub sub : subroutine_cfa_data =
+let process_sub sub next_instr_graph : subroutine_cfa_data =
  (** Extracts the `cfa_changes_fde` of a subroutine *)

  let cfg = BStd.Sub.to_cfg sub in
-  let next_instr_graph = build_next_instr cfg in

  let first_addr = int64_addr_of sub in
  let last_addr = find_last_addr sub in

  let initial_cfa_rsp_offset = Int64.of_int 8 in

+  let entry_blk = get_entry_blk cfg in
+  let rbp_pop_set = find_rbp_pop_set cfg entry_blk in
+
  let rec dfs_process
+      allow_rbp
      (sub_changes: (reg_changes_fde * reg_pos) TIdMap.t)
      node
      (entry_offset: reg_pos) =
@ -549,23 +680,56 @@ let process_sub sub : subroutine_cfa_data =
    | None ->
      (* Not yet visited: compute the changes *)
      let cur_blk_changes, end_reg =
-        process_blk next_instr_graph entry_offset cur_blk in
+        process_blk next_instr_graph rbp_pop_set
+          allow_rbp entry_offset cur_blk in
      let n_sub_changes =
        TIdMap.add tid (cur_blk_changes, entry_offset) sub_changes in
      BStd.Seq.fold (CFG.Node.succs node cfg)
-        ~f:(fun accu child -> dfs_process accu child end_reg)
+        ~f:(fun accu child -> dfs_process allow_rbp accu child end_reg)
        ~init:n_sub_changes
    | Some (_, former_entry_offset) ->
      (* Already visited: check that entry values are matching *)
-      if entry_offset <> former_entry_offset then
+      if entry_offset <> former_entry_offset then (
+        if allow_rbp then
+          Format.eprintf "Found inconsistency (0x%Lx): %a -- %a@."
+            (int64_addr_of cur_blk)
+            pp_reg_pos entry_offset pp_reg_pos former_entry_offset ;
        raise (Inconsistent tid)
+      )
      else
        sub_changes
  in

-  let entry_blk = get_entry_blk cfg in
+  let with_rbp_if_needed initial_offset =
+    (* Tries first without allowing CFA=rbp+k, then allowing it if the first
+       result was either inconsistent or lost track *)
+    let not_losing_track synth_result =
+      let lost_track = TIdMap.exists
+          (fun _ (_, (cfa_pos, _)) -> match cfa_pos with
+             | CfaLostTrack -> true
+             | _ -> false) synth_result
+      in
+      (match lost_track with
+       | true -> None
+       | false -> Some synth_result)
+    in
+    let without_rbp =
+      (try
+         dfs_process false TIdMap.empty entry_blk initial_offset
+         |> not_losing_track
+       with Inconsistent _ -> None
+      )
+    in
+    (match without_rbp with
+     | Some correct_res -> correct_res
+     | None ->
+       dfs_process true TIdMap.empty entry_blk initial_offset)
+  in
+
+
  let initial_offset = (RspOffset initial_cfa_rsp_offset, RbpUndef) in
-  let changes_map = dfs_process TIdMap.empty entry_blk initial_offset in
+  (* Try first without rbp, then with rbp upon failure *)
+  let changes_map = with_rbp_if_needed initial_offset in

  let merged_changes = TIdMap.fold
      (fun _ (cfa_changes, _) accu -> AddrMap.union (fun _ v1 v2 ->
@ -587,12 +751,16 @@ let process_sub sub : subroutine_cfa_data =

  output

-let of_prog prog : subroutine_cfa_map =
+let of_prog prog next_instr_graph : subroutine_cfa_map =
  (** Extracts the `cfa_changes` of a program *)
  let fold_step accu sub =
    (try
-       let subroutine_data = process_sub sub in
-       StrMap.add (BStd.Sub.name sub) subroutine_data accu
+       (match is_ghost_sub sub with
+        | true -> accu
+        | false ->
+          let subroutine_data = process_sub sub next_instr_graph in
+          StrMap.add (BStd.Sub.name sub) subroutine_data accu
+       )
     with
     | InvalidSub -> accu
     | Inconsistent tid ->
@ -606,10 +774,34 @@ let of_prog prog : subroutine_cfa_map =
    ~init:StrMap.empty
    ~f:fold_step

-let of_proj proj : subroutine_cfa_map =
+let build_sub_ranges prog: (memory_address) AddrMap.t =
+  (** Builds a map mapping the first address of each subroutine to its last
+      address. This map can be interpreted as a list of address ranges with
+      easy fast access to a member (cf Map.S.find_first) *)
+
+  let fold_subroutine accu sub =
+    (match is_ghost_sub sub with
+     | true -> accu
+     | false ->
+       let first_addr = int64_addr_of sub in
+       let last_addr = find_last_addr sub in
+       AddrMap.add first_addr (last_addr) accu
+    )
+  in
+
+  let subroutines = BStd.Term.enum BStd.sub_t prog in
+  BStd.Seq.fold subroutines
+    ~init:AddrMap.empty
+    ~f:fold_subroutine
+
+let of_proj no_rbp_undef proj : subroutine_cfa_map =
  (** Extracts the `cfa_changes` of a project *)
+  __settings.no_rbp_undef <- no_rbp_undef ;
  let prog = BStd.Project.program proj in
-  of_prog prog
+  let sub_ranges = build_sub_ranges prog in
+  let next_instr_graph =
+    build_next_instr sub_ranges (BStd.Project.disasm proj) in
+  of_prog prog next_instr_graph

 let clean_lost_track_subs pre_dwarf : subroutine_cfa_map =
  (** Removes the subroutines on which we lost track from [pre_dwarf] *)
--- a/csmith/.gitignore
+++ b/csmith/.gitignore
@ -0,0 +1 @@
+0[0-9]/
--- a/csmith/check_generated_eh_frame.py
+++ b/csmith/check_generated_eh_frame.py
@ -32,10 +32,105 @@ def parse_fde_head(line):
    return pc_beg, pc_end


-def parse_fde_row(line, ra_col):
+def detect_clang_flat_to_pyramid(rows):
+    """ Artificially repair clang flat callee-saved saving to a gcc pyramid-like shape.
+
+    Eg., clang will generate
+       LOC           CFA      rbx   rbp   ra
+    0000000000007180 rsp+8    u     u     c-8
+    0000000000007181 rsp+16   u     u     c-8
+    0000000000007182 rsp+24   u     u     c-8
+    0000000000007189 rsp+7632 c-24  c-16  c-8
+
+
+    while we would wish to have
+       LOC           CFA      rbx   rbp   ra
+    0000000000007180 rsp+8    u     u     c-8
+    0000000000007181 rsp+16   u     c-16  c-8
+    0000000000007182 rsp+24   c-24  c-16  c-8
+    0000000000007189 rsp+7632 c-24  c-16  c-8
+
+    The repair strategy is as follows:
+    - ignore the implicit first row
+    - find the first k lines such that only CFA changes from line to line, with a delta
+      of +8, with address delta of 2. (push is 2 bytes long)
+    - for every callee-saved R that concerns us and that is defined at line k+1 with
+      offset c-x, while rsp+x is the CFA of line k' <= k, redefine R as c-k in lines
+      [k'; k[
+    """
+
+    def try_starting_at(start_row):
+        if len(rows) < start_row + 1:  # Ensure we have at least the start row
+            return rows, False
+
+        flatness_row_id = start_row
+        if rows[1]["CFA"][:4] != "rsp+":
+            return rows, False
+        first_cfa = int(rows[start_row]["CFA"][4:])
+        prev_cfa = first_cfa
+        prev_loc = rows[start_row]["LOC"]
+        for row in rows[start_row + 1 :]:
+            for reg in row:
+                if reg not in ["LOC", "CFA", "ra"] and row[reg] != "u":
+                    break
+            cfa = row["CFA"]
+            if cfa[:4] != "rsp+":
+                break
+            cfa_offset = int(cfa[4:])
+            if cfa_offset != prev_cfa + 8:
+                break
+            prev_cfa += 8
+            loc = row["LOC"]
+            if loc > prev_loc + 2:
+                break
+            prev_loc = loc
+            flatness_row_id += 1
+        flatness_row_id += 1
+        if flatness_row_id - start_row <= 1 or flatness_row_id >= len(rows):
+            return rows, False  # nothing to change
+        flatness_row = rows[flatness_row_id]
+
+        reg_changes = {}
+        for reg in flatness_row:
+            if reg in ["LOC", "CFA", "ra"]:
+                continue
+            rule = flatness_row[reg]
+            if rule[:2] != "c-":
+                return rows, False  # Not a flat_to_pyramid after all
+            rule_offset = int(rule[2:])
+            rule_offset_rectified = rule_offset - first_cfa
+            if rule_offset_rectified % 8 != 0:
+                return rows, False
+            row_change_id = rule_offset_rectified // 8 + start_row
+            reg_changes[reg] = (row_change_id, rule)
+
+        for reg in reg_changes:
+            change_from, rule = reg_changes[reg]
+            for row in rows[change_from:flatness_row_id]:
+                row[reg] = rule
+
+        return rows, True
+
+    for start_row in [1, 2]:
+        mod_rows, modified = try_starting_at(start_row)
+        if modified:
+            return mod_rows
+    return rows
+
+
+def parse_fde_row(line, reg_cols):
    vals = list(map(lambda x: x.strip(), line.split()))
-    assert len(vals) > ra_col  # ra is the rightmost useful column
-    out = {"LOC": int(vals[0], 16), "CFA": vals[1], "ra": vals[ra_col]}
+    assert len(vals) > reg_cols["ra"]  # ra is the rightmost useful column
+
+    out = {"LOC": int(vals[0], 16), "CFA": vals[1]}
+
+    for reg in reg_cols:
+        col_id = reg_cols[reg]
+        out[reg] = vals[col_id]
+
+    if "rbp" not in out:
+        out["rbp"] = "u"
+
    return out


@ -47,7 +142,14 @@ def clean_rows(rows):
    out_rows = [rows[0]]
    for row in rows[1:]:
        if not row == out_rows[-1]:
-            out_rows.append(row)
+            filtered_row = row
+            filter_out = []
+            for reg in filtered_row:
+                if reg not in ["LOC", "CFA", "rbp", "ra"]:
+                    filter_out.append(reg)
+            for reg in filter_out:
+                filtered_row.pop(reg)
+            out_rows.append(filtered_row)
    return out_rows


@ -58,14 +160,28 @@ def parse_fde(lines):
    except NotFDE:
        return

-    rows = [{"LOC": 0, "CFA": "rsp+8", "ra": "c-8"}]  # Implicit CIE row
+    rows = [{"LOC": 0, "CFA": "rsp+8", "rbp": "u", "ra": "c-8"}]  # Implicit CIE row

    if len(lines) >= 2:  # Has content
        head_row = list(map(lambda x: x.strip(), lines[1].split()))
-        ra_col = head_row.index("ra")
+        reg_cols = {}
+        for pos, reg in enumerate(head_row):
+            if reg not in ["LOC", "CFA"]:
+                reg_cols[reg] = pos

        for line in lines[2:]:
-            rows.append(parse_fde_row(line, ra_col))
+            rows.append(parse_fde_row(line, reg_cols))
+
+    # if pc_beg == 0x1160:
+    #     print("===== FDE: {}..{} ====".format(hex(pc_beg), hex(pc_end)))
+    #     print("BEFORE:")
+    #     for row in rows:
+    #         print(row)
+    rows = detect_clang_flat_to_pyramid(rows)
+    # if pc_beg == 0x1160:
+    #     print("AFTER:")
+    #     for row in rows:
+    #         print(row)

    return {"beg": pc_beg, "end": pc_end, "rows": clean_rows(rows)}

@ -151,7 +267,7 @@ def dump_light_fdes(fdes):

 def match_fde(orig, synth):
    def vals_of(row):
-        return {"CFA": row["CFA"], "ra": row["ra"]}
+        return {"CFA": row["CFA"], "ra": row["ra"], "rbp": row["rbp"]}

    def loc_of(rch):
        return rch[1]["LOC"]
@ -165,7 +281,7 @@ def match_fde(orig, synth):
            rowchanges.append((typ, row))
    rowchanges.sort(key=loc_of)

-    matching = True
+    mismatch_count = 0
    for rowid, rowch in enumerate(rowchanges):
        typ, row = rowch[0], rowch[1]
        cur_val[typ] = vals_of(row)
@ -180,12 +296,17 @@ def match_fde(orig, synth):
                        hex(row["LOC"]), cur_val[0], cur_val[1]
                    )
                )
-            matching = False
+            mismatch_count += 1

-    return matching
+    return mismatch_count


 def parse_sym_table(handle):
+    def readint(x):
+        if x.startswith("0x"):
+            return int(x[2:], 16)
+        return int(x)
+
    out_map = {}
    for line in handle:
        line = line.strip()
@ -194,7 +315,7 @@ def parse_sym_table(handle):

        spl = list(map(lambda x: x.strip(), line.split()))
        loc = int(spl[1], 16)
-        size = int(spl[2])
+        size = readint(spl[2])
        name = spl[7]
        out_map[name] = (loc, size)
    return out_map
@ -227,15 +348,30 @@ def main():

    mismatches = 0
    for (orig, synth) in matched:
-        if not match_fde(orig, synth):
-            mismatches += 1
+        mismatches += match_fde(orig, synth)
    reports = []
    if mismatches > 0:
        reports.append("{} mismatches".format(mismatches))
    if unmatched_orig:
-        reports.append("{} unmatched (orig)".format(len(unmatched_orig)))
+        worth_reporting = False
+        for unmatched in unmatched_orig:
+            if len(unmatched["rows"]) > 1:
+                worth_reporting = True
+                break
+        if worth_reporting:
+            unmatched_addrs = [fde_pos(fde) for fde in unmatched_orig]
+            reports.append(
+                "{} unmatched (orig): {}".format(
+                    len(unmatched_orig), ", ".join(unmatched_addrs)
+                )
+            )
    if unmatched_synth:
-        reports.append("{} unmatched (synth)".format(len(unmatched_synth)))
+        unmatched_addrs = [fde_pos(fde) for fde in unmatched_synth]
+        reports.append(
+            "{} unmatched (synth): {}".format(
+                len(unmatched_synth), ", ".join(unmatched_addrs)
+            )
+        )

    if reports:
        print("{}: {}".format(test_name, "; ".join(reports)))
--- a/csmith/csmith_gen.sh
+++ b/csmith/csmith_gen.sh
@ -1,6 +1,12 @@
 #!/bin/bash

-USAGE="$0 output_dir nb_tests"
+USAGE="$0 output_dir nb_tests
+You may also set COMPILE_CMD to eg. 'gcc -O2' if you want to override the
+default command."
+
+if [ -z "$COMPILE_CMD" ] ; then
+    COMPILE_CMD='gcc -O2'
+fi

 if [ "$#" -lt 2 ] ; then
    >&2 echo -e "Missing argument(s). Usage:\n$USAGE"
@ -28,11 +34,11 @@ for _num in $(seq 1 $NB_TESTS); do
    csmith > "$path.c"
    sed -i 's/^static \(.* func_\)/\1/g' "$path.c"
    echo -ne "\r>>> $num.bin          "
-    gcc -O2 -I/usr/include/csmith-2.3.0/ -w "$path.c" -o "$path.orig.bin"
+    $COMPILE_CMD -I/usr/include/csmith-2.3.0/ -w "$path.c" -o "$path.orig.bin"
    objcopy --remove-section '.eh_frame' --remove-section '.eh_frame_hdr' \
        "$path.orig.bin" "$path.bin"
    echo -ne "\r>>> $num.eh.bin          "
-    ../synthesize_dwarf.sh "$path.bin" "$path.eh.bin"
+    BAP_ARGS='--dwarfsynth-no-rbp-undef' ../synthesize_dwarf.sh "$path.bin" "$path.eh.bin"

    if [ "$check_gen_eh_frame" -gt 0 ] ; then
        ./check_generated_eh_frame.sh "$path"
--- a/curiosities/.gitignore
+++ b/curiosities/.gitignore
@ -0,0 +1 @@
+*.bin
--- a/curiosities/gccO2_join_fail/24.c
+++ b/curiosities/gccO2_join_fail/24.c
--- a/curiosities/mov_rsp_rbp_falsepos/72.c
+++ b/curiosities/mov_rsp_rbp_falsepos/72.c
--- a/dwarfsynth.ml
+++ b/dwarfsynth.ml
@ -20,8 +20,18 @@ module Cmdline = struct
        ~default:"tmp.marshal"
    )

+  let no_rbp_undef = Cnf.(
+      param (bool) "no-rbp-undef"
+        ~doc:("Do not unset %rbp after it has been set once in a FDE. "
+              ^"This mimics gcc eh_frame for ease of validation.")
+        ~as_flag:true
+        ~default:false
+    )
+
  let () = Cnf.(
      when_ready ((fun {get=(!!)} ->
-          Bap.Std.Project.register_pass' (main !!outfile)))
+          Bap.Std.Project.register_pass' (main
+                                            ~no_rbp_undef:!!no_rbp_undef
+                                            !!outfile )))
    )
 end
--- a/synthesize_dwarf.sh
+++ b/synthesize_dwarf.sh
@ -35,7 +35,9 @@ function find_ml_dwarf_write {
 }

 function bap_synth {
-    bap "$INPUT_FILE" --no-byteweight -p dwarfsynth --dwarfsynth-output "$TMP_DIR/marshal" \
+    bap "$INPUT_FILE" \
+        --no-byteweight -p dwarfsynth \
+        --dwarfsynth-output "$TMP_DIR/marshal" $BAP_ARGS \
        > /dev/null
    return $?
 }
--- a/test/.gitignore
+++ b/test/.gitignore
@ -0,0 +1 @@
+*.bin
--- a/test/inline_asm.c
+++ b/test/inline_asm.c
@ -0,0 +1,19 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char** argv) {
+    if(argc < 2) {
+        fprintf(stderr, "Missing argument: loop upper bound.\n");
+        exit(1);
+    }
+
+    int upper_bound = atoi(argv[1]);
+    int count = 0;
+    for(int i=0; i < upper_bound; ++i) {
+        __asm__("sub $8, %rsp; movq $42, (%rsp)");
+        count++;
+        __asm__("add $8, %rsp");
+    }
+    printf("%d\n", count);
+    return 0;
+}
--- a/test/without_rbp_col.c
+++ b/test/without_rbp_col.c
@ -0,0 +1,28 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+void rbp_bump_2(int z) {
+    for(int cz=1; cz < z; cz++) {
+        int x[cz];
+        x[cz / 2] = 8;
+    }
+}
+
+void rbp_bump_1(int y) {
+    for(int cy=1; cy < y; cy++) {
+        int x[cy];
+        x[cy / 2] = 8;
+        rbp_bump_2(x[cy/2]);
+    }
+}
+
+int main(int argc, char** argv) {
+    if(argc < 2) {
+        fprintf(stderr, "Missing argument: n\n");
+        return 1;
+    }
+    int num = atoi(argv[1]);
+    rbp_bump_1(num);
+    return 0;
+}
Author	SHA1	Message	Date
Théophile Bastian	6df057260d	Add uncommitted tests and curiosities	2019-04-05 16:39:21 +02:00
Théophile Bastian	11fa3fab40	Check_gen: patch for flat vs pyramid clang tables clang generates tables like this LOC CFA rbx rbp r14 r15 ra 00000000000028a0 rsp+8 u u u u c-8 00000000000028a1 rsp+16 u u u u c-8 00000000000028a3 rsp+24 u u u u c-8 00000000000028a5 rsp+32 u u u u c-8 00000000000028a6 rsp+40 u u u u c-8 00000000000028aa rsp+64 c-40 c-16 c-32 c-24 c-8	2019-04-05 16:32:00 +02:00
Théophile Bastian	344ac84ef3	Ignore ghost subroutines for clang A ghost subroutine is a subroutine having, in the IR representation, no content. At clang -O0, some might be generated, eg. <foo_func>: foo_addr nop which translates to Empty in BIL.	2019-04-05 11:23:18 +02:00
Théophile Bastian	6c18d9f537	Use rbp only on the subs where we need it	2019-04-05 11:23:13 +02:00
Théophile Bastian	29ab916c55	Fix next_instr_graph out-of-subroutine pointers The disasm-based next_instr_graph would introduce next instructions out of the current subroutine for eg. calls, jmp to plts, etc.	2019-04-04 19:47:36 +02:00
Théophile Bastian	5f7dfb6f5f	Use disasm to generate next_instr_graph	2019-04-04 13:49:39 +02:00
Théophile Bastian	3d336de196	Add flag to never go back to undefined rbp Once rbp has been set in the DWARF, if this flag is set, nothing will remove it from the table. This mimicks gcc and allows us to check easily our tables against theirs.	2019-04-04 11:52:47 +02:00
Théophile Bastian	4313ee91a7	Curiosities: `mov %rsp, %rbp` not switching to rbp tracking	2019-04-04 11:51:57 +02:00
Théophile Bastian	c3050da113	Csmith checker: check rbp column	2019-04-03 18:20:03 +02:00
Théophile Bastian	5d6929f84c	csmith_gen: add COMPILE_CMD option	2019-04-03 12:22:07 +02:00