2019-09-26 17:22:40 +02:00
|
|
|
(** Module to acquire the ASM of a given ELF object by its path
|
|
|
|
|
|
|
|
Uses `objdump -d` internally, and parses the output
|
|
|
|
*)
|
|
|
|
|
|
|
|
exception ParseError of string
|
|
|
|
|
2019-11-18 11:34:14 +01:00
|
|
|
module AsmTypes = Asm_info.NoAnnot
|
|
|
|
|
2019-09-26 17:22:40 +02:00
|
|
|
(** Pretty printers *)
|
|
|
|
let pp_hex_bytes ppx bytes_array =
|
2019-09-27 16:38:31 +02:00
|
|
|
(* Number of lone spaces to be printed after the bytes. Objdump prints 21. *)
|
|
|
|
let remaining_spaces = 21 - 3 * (Bytes.length bytes_array) in
|
|
|
|
Bytes.iter (fun byte -> Format.fprintf ppx "%02x " (int_of_char byte))
|
|
|
|
bytes_array ;
|
|
|
|
Format.fprintf ppx "%s" (String.make remaining_spaces ' ')
|
2019-09-26 17:22:40 +02:00
|
|
|
|
|
|
|
let pp_asm_instr ppx asm_instr =
|
2019-11-18 11:34:14 +01:00
|
|
|
AsmTypes.(
|
|
|
|
Format.fprintf ppx " %04x:\t%a\t%s@."
|
|
|
|
asm_instr.instr_addr
|
|
|
|
pp_hex_bytes asm_instr.instr_bytes
|
|
|
|
asm_instr.instr_asm
|
|
|
|
)
|
2019-09-26 17:22:40 +02:00
|
|
|
|
|
|
|
let pp_asm ppx asm_instrs = List.iter (pp_asm_instr ppx) asm_instrs
|
|
|
|
|
|
|
|
let pp_asm_sub ppx asm_sub =
|
2019-11-18 11:34:14 +01:00
|
|
|
AsmTypes.(
|
|
|
|
Format.fprintf ppx "%016x <%s> {%s}:@.@[%a@]@."
|
|
|
|
asm_sub.sub_addr
|
|
|
|
asm_sub.sub_name
|
|
|
|
asm_sub.sub_section
|
|
|
|
pp_asm asm_sub.sub_asm
|
|
|
|
)
|
2019-09-26 17:22:40 +02:00
|
|
|
|
|
|
|
let pp_asm_info ppx asm_info =
|
2019-09-30 09:26:06 +02:00
|
|
|
List.iter (fun asm_sub -> Format.fprintf ppx "%a" pp_asm_sub asm_sub)
|
2019-09-26 17:22:40 +02:00
|
|
|
asm_info
|
|
|
|
|
|
|
|
(** Reads the whole content of a Unix file descriptor, returning it as a Bytes
|
|
|
|
sequence *)
|
|
|
|
let read_all_fd fd =
|
|
|
|
Unix.set_nonblock fd ;
|
|
|
|
|
|
|
|
let rec do_read accu cur_size =
|
|
|
|
let n_accu = Bytes.extend accu 0 256 in
|
|
|
|
let bytes_read = (
|
|
|
|
try
|
|
|
|
Unix.read fd n_accu cur_size 256
|
|
|
|
with
|
|
|
|
| Unix.Unix_error(Unix.EAGAIN, _, _)
|
|
|
|
| Unix.Unix_error(Unix.EWOULDBLOCK, _, _) ->
|
|
|
|
0
|
|
|
|
) in
|
|
|
|
|
|
|
|
if bytes_read = 256 then
|
|
|
|
do_read n_accu (cur_size + 256)
|
|
|
|
else (
|
|
|
|
Bytes.extend n_accu 0 (bytes_read - 256)
|
|
|
|
)
|
|
|
|
in
|
|
|
|
do_read (Bytes.create 0) 0
|
|
|
|
|
|
|
|
|
|
|
|
(** Raised when the call to objdump failed. An exception
|
|
|
|
[ObjdumpFailed (status_code, stderr)] will contain the status code of the
|
|
|
|
objdump process, and its stderr contents as a string. *)
|
|
|
|
exception ObjdumpFailed of int * string
|
|
|
|
|
|
|
|
|
|
|
|
(** Runs `objdump -d prog_path` and returns its stdout as a string *)
|
|
|
|
let get_objdump prog_path =
|
|
|
|
(* Setup process *)
|
|
|
|
let stdin_read, stdin = Unix.pipe () in
|
|
|
|
let stdout, stdout_write = Unix.pipe () in
|
|
|
|
let stderr, stderr_write = Unix.pipe () in
|
|
|
|
|
|
|
|
(* Run the process *)
|
|
|
|
Format.eprintf "Running objdump...@." ;
|
|
|
|
let objdump_pid = Unix.create_process
|
|
|
|
"objdump"
|
|
|
|
[| "objdump" ; "-d" ; prog_path |]
|
|
|
|
stdin_read stdout_write stderr_write
|
|
|
|
in
|
|
|
|
(* Close stdin already: we won't send anything and make it clear *)
|
|
|
|
Unix.close stdin ;
|
|
|
|
Format.eprintf "\tPID = %d@." objdump_pid;
|
|
|
|
|
|
|
|
let pid, status = Unix.waitpid [] objdump_pid in
|
|
|
|
if pid <> objdump_pid then
|
|
|
|
raise (Failure ("Could not properly wait on objdump")) ;
|
|
|
|
|
|
|
|
Format.eprintf "Run objdump: done. Status: %a@." (fun ppx sc -> match sc with
|
|
|
|
| Unix.WEXITED ex_code -> Format.fprintf ppx "%d" ex_code
|
|
|
|
| _ -> Format.fprintf ppx "Not exited") status ;
|
|
|
|
|
|
|
|
(* Extract stdout, stderr data *)
|
|
|
|
let stdout_content = read_all_fd stdout in
|
|
|
|
let stderr_content = read_all_fd stderr in
|
|
|
|
|
|
|
|
(* Properly close pipes *)
|
|
|
|
Unix.close stdout ;
|
|
|
|
Unix.close stderr ;
|
|
|
|
|
|
|
|
(* Check status code *)
|
|
|
|
match status with
|
|
|
|
| Unix.WEXITED 0 ->
|
|
|
|
Bytes.to_string stdout_content
|
|
|
|
| Unix.WEXITED err_code ->
|
|
|
|
raise (ObjdumpFailed (err_code, (Bytes.to_string stderr_content)))
|
|
|
|
| _ ->
|
|
|
|
raise (ObjdumpFailed (-1, (Bytes.to_string stderr_content)))
|
|
|
|
|
2019-09-27 16:38:31 +02:00
|
|
|
(** [ObjdumpAccu cur_info cur_section cur_sub_stub cur_asm] contains:
|
|
|
|
- all the previously encoutered and parsed subroutines in `cur_info`;
|
|
|
|
- the current section name in `cur_section`;
|
|
|
|
- the current subroutine's stub (ie. without asm) in `cur_sub_stub`, or
|
|
|
|
None if the previous subroutine was committed;
|
|
|
|
- the current subroutine's asm lines
|
|
|
|
*)
|
2019-11-18 11:34:14 +01:00
|
|
|
|
2019-09-27 16:38:31 +02:00
|
|
|
type interpret_objdump_accu =
|
2019-11-18 11:34:14 +01:00
|
|
|
ObjdumpAccu of AsmTypes.asm_info_t * string * AsmTypes.asm_sub_t option *
|
|
|
|
AsmTypes.asm_t
|
2019-09-27 16:38:31 +02:00
|
|
|
|
2019-09-26 17:22:40 +02:00
|
|
|
(** Interprets `objdump -d` output and yield a list of functions, alongside
|
|
|
|
with their addresses, symbol names, asm, … *)
|
2019-11-18 11:34:14 +01:00
|
|
|
let interpret_objdump objdump_out : AsmTypes.asm_info_t =
|
2019-09-27 16:38:31 +02:00
|
|
|
|
|
|
|
(* Reads a string of bytes formatted like "01 23 ae 3f" and output a Bytes.t
|
|
|
|
object *)
|
|
|
|
let read_bytes_str bytes_str =
|
|
|
|
let concat_chars chars =
|
|
|
|
String.concat "" (List.map (String.make 1) chars)
|
|
|
|
in
|
|
|
|
let bytes_out =
|
|
|
|
String.split_on_char ' ' bytes_str
|
|
|
|
|> List.map (fun x -> char_of_int @@ int_of_string ("0x" ^ x))
|
|
|
|
|> concat_chars
|
|
|
|
|> Bytes.of_string
|
|
|
|
in
|
|
|
|
bytes_out
|
|
|
|
in
|
|
|
|
|
|
|
|
(* Actually aggregates data line by line *)
|
|
|
|
let aggregate_info objdump_accu cur_line =
|
|
|
|
let get_char_opt str pos =
|
|
|
|
(try
|
|
|
|
Some (String.get str pos)
|
|
|
|
with Invalid_argument _ -> None)
|
|
|
|
in
|
|
|
|
|
|
|
|
(match get_char_opt cur_line 0 with
|
|
|
|
(* Empty string *)
|
|
|
|
| None -> (* Commit current in-flight subroutine if any, reset *)
|
|
|
|
(match objdump_accu with
|
|
|
|
| ObjdumpAccu(_, _, None, []) ->
|
|
|
|
objdump_accu (* Nothing to be done *)
|
|
|
|
| ObjdumpAccu(cur_info, cur_section, Some in_flight, asm) ->
|
|
|
|
(* Commit in-flight *)
|
|
|
|
let full_sub = { in_flight with sub_asm = List.rev asm } in
|
|
|
|
ObjdumpAccu(
|
2019-09-30 09:26:06 +02:00
|
|
|
full_sub::cur_info,
|
2019-09-27 16:38:31 +02:00
|
|
|
cur_section,
|
|
|
|
None,
|
|
|
|
[]
|
|
|
|
)
|
|
|
|
| ObjdumpAccu(_, _, None, _) ->
|
|
|
|
(* This state should not be reachable. Corrupt state. *)
|
|
|
|
raise (ParseError "Reached invalid state")
|
|
|
|
)
|
|
|
|
(* Indented line: some asm line in a subroutine *)
|
|
|
|
| Some ' ' ->
|
|
|
|
(* Expected format: " HEX: *\t\(HEX \)+ *\(\t.*\)?$"
|
|
|
|
where the first HEX is the address,
|
|
|
|
the list of HEX (byte by byte) is the binary encoding of the
|
|
|
|
instruction,
|
|
|
|
the last optionnal part is its human-readable counterpart.
|
|
|
|
*)
|
|
|
|
(match String.split_on_char '\t' cur_line with
|
|
|
|
| addr_str :: bytes_str :: tl ->
|
|
|
|
(try
|
|
|
|
let addr = Scanf.sscanf addr_str " %x:" (fun x -> x) in
|
|
|
|
let bytes_repr = read_bytes_str @@ String.trim bytes_str in
|
|
|
|
let asm_repr = (match tl with
|
|
|
|
| [] -> ""
|
|
|
|
| str::[] -> String.trim str
|
|
|
|
| _ -> raise (
|
|
|
|
ParseError
|
|
|
|
(Format.sprintf "Invalid subroutine line: \"%s\""
|
|
|
|
cur_line))
|
|
|
|
) in
|
2019-11-18 11:34:14 +01:00
|
|
|
let line = AsmTypes.({
|
2019-09-27 16:38:31 +02:00
|
|
|
instr_addr = addr;
|
|
|
|
instr_bytes = bytes_repr;
|
2019-11-18 11:34:14 +01:00
|
|
|
instr_asm = asm_repr;
|
|
|
|
instr_annot = ();
|
|
|
|
}) in
|
2019-09-27 16:38:31 +02:00
|
|
|
(match objdump_accu with
|
|
|
|
| ObjdumpAccu(cur_info, cur_section, Some in_flight, asm) ->
|
|
|
|
ObjdumpAccu(cur_info, cur_section, Some in_flight, line::asm)
|
|
|
|
| ObjdumpAccu(_, _, None, _) ->
|
|
|
|
(* This state should not be reachable. Corrupt state. *)
|
|
|
|
raise (ParseError "Reached invalid state")
|
|
|
|
)
|
|
|
|
with Scanf.Scan_failure msg ->
|
|
|
|
raise (ParseError ("Parsing subroutine line: " ^ msg))
|
|
|
|
)
|
|
|
|
| _ ->
|
|
|
|
raise (ParseError (Format.sprintf "Invalid subroutine line: \"%s\""
|
|
|
|
cur_line))
|
|
|
|
)
|
|
|
|
(* "Disassembly of section …" line *)
|
|
|
|
| Some 'D' ->
|
|
|
|
let new_section = (
|
|
|
|
try Scanf.sscanf cur_line "Disassembly of section %s@:" (fun x -> x)
|
|
|
|
with Scanf.Scan_failure msg ->
|
|
|
|
raise (ParseError ("Parsing section boundary line: " ^ msg)))
|
|
|
|
in
|
|
|
|
(match objdump_accu with
|
|
|
|
| ObjdumpAccu(cur_info, _, None, []) ->
|
|
|
|
ObjdumpAccu(cur_info, new_section, None, [])
|
|
|
|
| _ -> (* We should not change section in this state -- corrupt *)
|
|
|
|
raise (ParseError "Reached invalid state")
|
|
|
|
)
|
|
|
|
(* Other line: it should only be a subroutine header *)
|
|
|
|
| _ ->
|
|
|
|
let sub_address, sub_name =
|
|
|
|
(try Scanf.sscanf cur_line "%016x <%s@>:"
|
|
|
|
(fun addr name -> (addr, name))
|
|
|
|
with Scanf.Scan_failure msg ->
|
|
|
|
raise (ParseError ("Parsing subroutine header line: " ^ msg))
|
|
|
|
) in
|
|
|
|
(match objdump_accu with
|
|
|
|
| ObjdumpAccu(cur_info, cur_section, None, []) ->
|
|
|
|
ObjdumpAccu(cur_info, cur_section, Some {
|
|
|
|
sub_section = cur_section;
|
|
|
|
sub_name = sub_name;
|
|
|
|
sub_addr = sub_address;
|
|
|
|
sub_asm = [];
|
|
|
|
}, [])
|
|
|
|
| _ -> (* We should not change section in this state -- corrupt *)
|
|
|
|
raise (ParseError "Reached invalid state")
|
|
|
|
)
|
|
|
|
)
|
|
|
|
in
|
|
|
|
|
|
|
|
let rec drop_k k lst = match (k, lst) with
|
|
|
|
| 0, lst -> lst
|
|
|
|
| k, (_::tl) -> drop_k (k-1) tl
|
|
|
|
| _, [] -> raise (Invalid_argument "drop_k on empty list")
|
|
|
|
in
|
|
|
|
|
|
|
|
let result =
|
|
|
|
String.split_on_char '\n' objdump_out
|
|
|
|
|> drop_k 2 (* Two first lines are meaningless for us *)
|
|
|
|
|> List.fold_left aggregate_info
|
2019-09-30 09:26:06 +02:00
|
|
|
(ObjdumpAccu([], "", None, []))
|
2019-09-27 16:38:31 +02:00
|
|
|
in
|
|
|
|
(match result with
|
2019-09-30 09:26:06 +02:00
|
|
|
| ObjdumpAccu(result, _, None, []) ->
|
|
|
|
result
|
|
|
|
|> List.rev
|
2019-09-27 16:38:31 +02:00
|
|
|
| _ -> raise (ParseError "Invalid end state"))
|
2019-09-27 16:45:08 +02:00
|
|
|
|
|
|
|
|
|
|
|
(** Extract asm infos from an ELF file path *)
|
|
|
|
let acquire_asm elf_path =
|
|
|
|
get_objdump elf_path
|
|
|
|
|> interpret_objdump
|