Parse objdump output + test code
This commit is contained in:
parent
c54547dad9
commit
8038a92e89
3 changed files with 187 additions and 6 deletions
4
dune
Normal file
4
dune
Normal file
|
@ -0,0 +1,4 @@
|
|||
(include_subdirs unqualified)
|
||||
(executable
|
||||
(name test_objdump)
|
||||
(libraries linksem))
|
|
@ -27,11 +27,14 @@ exception ParseError of string
|
|||
|
||||
(** Pretty printers *)
|
||||
let pp_hex_bytes ppx bytes_array =
|
||||
Bytes.iter (fun byte -> Format.fprintf ppx "%02x" (int_of_char byte))
|
||||
bytes_array
|
||||
(* Number of lone spaces to be printed after the bytes. Objdump prints 21. *)
|
||||
let remaining_spaces = 21 - 3 * (Bytes.length bytes_array) in
|
||||
Bytes.iter (fun byte -> Format.fprintf ppx "%02x " (int_of_char byte))
|
||||
bytes_array ;
|
||||
Format.fprintf ppx "%s" (String.make remaining_spaces ' ')
|
||||
|
||||
let pp_asm_instr ppx asm_instr =
|
||||
Format.fprintf ppx "%04x:\t%a\t%s@."
|
||||
Format.fprintf ppx " %04x:\t%a\t%s@."
|
||||
asm_instr.instr_addr
|
||||
pp_hex_bytes asm_instr.instr_bytes
|
||||
asm_instr.instr_asm
|
||||
|
@ -123,9 +126,149 @@ let get_objdump prog_path =
|
|||
| _ ->
|
||||
raise (ObjdumpFailed (-1, (Bytes.to_string stderr_content)))
|
||||
|
||||
(** [ObjdumpAccu cur_info cur_section cur_sub_stub cur_asm] contains:
|
||||
- all the previously encoutered and parsed subroutines in `cur_info`;
|
||||
- the current section name in `cur_section`;
|
||||
- the current subroutine's stub (ie. without asm) in `cur_sub_stub`, or
|
||||
None if the previous subroutine was committed;
|
||||
- the current subroutine's asm lines
|
||||
*)
|
||||
type interpret_objdump_accu =
|
||||
ObjdumpAccu of asm_info_t * string * asm_sub_t option * asm_t
|
||||
|
||||
(** Interprets `objdump -d` output and yield a list of functions, alongside
|
||||
with their addresses, symbol names, asm, … *)
|
||||
let interpret_objdump objdump_out : asm_info_t =
|
||||
(* TODO *)
|
||||
ignore objdump_out;
|
||||
assert false
|
||||
|
||||
(* Reads a string of bytes formatted like "01 23 ae 3f" and output a Bytes.t
|
||||
object *)
|
||||
let read_bytes_str bytes_str =
|
||||
let concat_chars chars =
|
||||
String.concat "" (List.map (String.make 1) chars)
|
||||
in
|
||||
let bytes_out =
|
||||
String.split_on_char ' ' bytes_str
|
||||
|> List.map (fun x -> char_of_int @@ int_of_string ("0x" ^ x))
|
||||
|> concat_chars
|
||||
|> Bytes.of_string
|
||||
in
|
||||
bytes_out
|
||||
in
|
||||
|
||||
(* Actually aggregates data line by line *)
|
||||
let aggregate_info objdump_accu cur_line =
|
||||
let get_char_opt str pos =
|
||||
(try
|
||||
Some (String.get str pos)
|
||||
with Invalid_argument _ -> None)
|
||||
in
|
||||
|
||||
(match get_char_opt cur_line 0 with
|
||||
(* Empty string *)
|
||||
| None -> (* Commit current in-flight subroutine if any, reset *)
|
||||
(match objdump_accu with
|
||||
| ObjdumpAccu(_, _, None, []) ->
|
||||
objdump_accu (* Nothing to be done *)
|
||||
| ObjdumpAccu(cur_info, cur_section, Some in_flight, asm) ->
|
||||
(* Commit in-flight *)
|
||||
let full_sub = { in_flight with sub_asm = List.rev asm } in
|
||||
ObjdumpAccu(
|
||||
StrMap.add full_sub.sub_name full_sub cur_info,
|
||||
cur_section,
|
||||
None,
|
||||
[]
|
||||
)
|
||||
| ObjdumpAccu(_, _, None, _) ->
|
||||
(* This state should not be reachable. Corrupt state. *)
|
||||
raise (ParseError "Reached invalid state")
|
||||
)
|
||||
(* Indented line: some asm line in a subroutine *)
|
||||
| Some ' ' ->
|
||||
(* Expected format: " HEX: *\t\(HEX \)+ *\(\t.*\)?$"
|
||||
where the first HEX is the address,
|
||||
the list of HEX (byte by byte) is the binary encoding of the
|
||||
instruction,
|
||||
the last optionnal part is its human-readable counterpart.
|
||||
*)
|
||||
(match String.split_on_char '\t' cur_line with
|
||||
| addr_str :: bytes_str :: tl ->
|
||||
(try
|
||||
let addr = Scanf.sscanf addr_str " %x:" (fun x -> x) in
|
||||
let bytes_repr = read_bytes_str @@ String.trim bytes_str in
|
||||
let asm_repr = (match tl with
|
||||
| [] -> ""
|
||||
| str::[] -> String.trim str
|
||||
| _ -> raise (
|
||||
ParseError
|
||||
(Format.sprintf "Invalid subroutine line: \"%s\""
|
||||
cur_line))
|
||||
) in
|
||||
let line = {
|
||||
instr_addr = addr;
|
||||
instr_bytes = bytes_repr;
|
||||
instr_asm = asm_repr
|
||||
} in
|
||||
(match objdump_accu with
|
||||
| ObjdumpAccu(cur_info, cur_section, Some in_flight, asm) ->
|
||||
ObjdumpAccu(cur_info, cur_section, Some in_flight, line::asm)
|
||||
| ObjdumpAccu(_, _, None, _) ->
|
||||
(* This state should not be reachable. Corrupt state. *)
|
||||
raise (ParseError "Reached invalid state")
|
||||
)
|
||||
with Scanf.Scan_failure msg ->
|
||||
raise (ParseError ("Parsing subroutine line: " ^ msg))
|
||||
)
|
||||
| _ ->
|
||||
raise (ParseError (Format.sprintf "Invalid subroutine line: \"%s\""
|
||||
cur_line))
|
||||
)
|
||||
(* "Disassembly of section …" line *)
|
||||
| Some 'D' ->
|
||||
let new_section = (
|
||||
try Scanf.sscanf cur_line "Disassembly of section %s@:" (fun x -> x)
|
||||
with Scanf.Scan_failure msg ->
|
||||
raise (ParseError ("Parsing section boundary line: " ^ msg)))
|
||||
in
|
||||
(match objdump_accu with
|
||||
| ObjdumpAccu(cur_info, _, None, []) ->
|
||||
ObjdumpAccu(cur_info, new_section, None, [])
|
||||
| _ -> (* We should not change section in this state -- corrupt *)
|
||||
raise (ParseError "Reached invalid state")
|
||||
)
|
||||
(* Other line: it should only be a subroutine header *)
|
||||
| _ ->
|
||||
let sub_address, sub_name =
|
||||
(try Scanf.sscanf cur_line "%016x <%s@>:"
|
||||
(fun addr name -> (addr, name))
|
||||
with Scanf.Scan_failure msg ->
|
||||
raise (ParseError ("Parsing subroutine header line: " ^ msg))
|
||||
) in
|
||||
(match objdump_accu with
|
||||
| ObjdumpAccu(cur_info, cur_section, None, []) ->
|
||||
ObjdumpAccu(cur_info, cur_section, Some {
|
||||
sub_section = cur_section;
|
||||
sub_name = sub_name;
|
||||
sub_addr = sub_address;
|
||||
sub_asm = [];
|
||||
}, [])
|
||||
| _ -> (* We should not change section in this state -- corrupt *)
|
||||
raise (ParseError "Reached invalid state")
|
||||
)
|
||||
)
|
||||
in
|
||||
|
||||
let rec drop_k k lst = match (k, lst) with
|
||||
| 0, lst -> lst
|
||||
| k, (_::tl) -> drop_k (k-1) tl
|
||||
| _, [] -> raise (Invalid_argument "drop_k on empty list")
|
||||
in
|
||||
|
||||
let result =
|
||||
String.split_on_char '\n' objdump_out
|
||||
|> drop_k 2 (* Two first lines are meaningless for us *)
|
||||
|> List.fold_left aggregate_info
|
||||
(ObjdumpAccu(StrMap.empty, "", None, []))
|
||||
in
|
||||
(match result with
|
||||
| ObjdumpAccu(result, _, None, []) -> result
|
||||
| _ -> raise (ParseError "Invalid end state"))
|
||||
|
|
34
src/test_objdump.ml
Normal file
34
src/test_objdump.ml
Normal file
|
@ -0,0 +1,34 @@
|
|||
let uncaught_exn exc backtrace =
|
||||
let descr = (match exc with
|
||||
| Failure msg -> Format.sprintf " (%s)" msg
|
||||
| _ -> ""
|
||||
) in
|
||||
Format.eprintf "Uncaught exception %s%s. Backtrace:@.%s@."
|
||||
(Printexc.exn_slot_name exc)
|
||||
descr
|
||||
(Printexc.raw_backtrace_to_string backtrace) ;
|
||||
exit 1
|
||||
|
||||
let read_all handle =
|
||||
let len = in_channel_length handle in
|
||||
let buffer = Bytes.create len in
|
||||
let bytes_read = input handle buffer 0 len in
|
||||
if bytes_read <> len then
|
||||
raise (Failure "Could not read whole file")
|
||||
else
|
||||
Bytes.to_string buffer
|
||||
|
||||
let _ =
|
||||
Printexc.record_backtrace true ;
|
||||
Printexc.set_uncaught_exception_handler uncaught_exn ;
|
||||
let in_path = "/tmp/objdump_dump" in
|
||||
let in_handle = open_in in_path in
|
||||
let objdump_out = read_all in_handle in
|
||||
close_in in_handle ;
|
||||
let parsed = Asm_acquire.interpret_objdump objdump_out in
|
||||
Format.eprintf "%a" Asm_acquire.pp_asm_info parsed
|
||||
(*
|
||||
List.iter (fun asm_sub ->
|
||||
Format.eprintf "%a" Asm_acquire.pp_asm_sub asm_sub)
|
||||
parsed
|
||||
*)
|
Loading…
Reference in a new issue