From 8038a92e897fc9eaedcea6b4644bd7d8e6aaa233 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Bastian?= Date: Fri, 27 Sep 2019 16:38:31 +0200 Subject: [PATCH] Parse objdump output + test code --- dune | 4 ++ src/asm_acquire.ml | 155 ++++++++++++++++++++++++++++++++++++++++++-- src/test_objdump.ml | 34 ++++++++++ 3 files changed, 187 insertions(+), 6 deletions(-) create mode 100644 dune create mode 100644 src/test_objdump.ml diff --git a/dune b/dune new file mode 100644 index 0000000..835a970 --- /dev/null +++ b/dune @@ -0,0 +1,4 @@ +(include_subdirs unqualified) +(executable + (name test_objdump) + (libraries linksem)) diff --git a/src/asm_acquire.ml b/src/asm_acquire.ml index fb3c21b..ac0fa24 100644 --- a/src/asm_acquire.ml +++ b/src/asm_acquire.ml @@ -27,11 +27,14 @@ exception ParseError of string (** Pretty printers *) let pp_hex_bytes ppx bytes_array = - Bytes.iter (fun byte -> Format.fprintf ppx "%02x" (int_of_char byte)) - bytes_array + (* Number of lone spaces to be printed after the bytes. Objdump prints 21. *) + let remaining_spaces = 21 - 3 * (Bytes.length bytes_array) in + Bytes.iter (fun byte -> Format.fprintf ppx "%02x " (int_of_char byte)) + bytes_array ; + Format.fprintf ppx "%s" (String.make remaining_spaces ' ') let pp_asm_instr ppx asm_instr = - Format.fprintf ppx "%04x:\t%a\t%s@." + Format.fprintf ppx " %04x:\t%a\t%s@." asm_instr.instr_addr pp_hex_bytes asm_instr.instr_bytes asm_instr.instr_asm @@ -123,9 +126,149 @@ let get_objdump prog_path = | _ -> raise (ObjdumpFailed (-1, (Bytes.to_string stderr_content))) +(** [ObjdumpAccu cur_info cur_section cur_sub_stub cur_asm] contains: + - all the previously encoutered and parsed subroutines in `cur_info`; + - the current section name in `cur_section`; + - the current subroutine's stub (ie. without asm) in `cur_sub_stub`, or + None if the previous subroutine was committed; + - the current subroutine's asm lines +*) +type interpret_objdump_accu = + ObjdumpAccu of asm_info_t * string * asm_sub_t option * asm_t + (** Interprets `objdump -d` output and yield a list of functions, alongside with their addresses, symbol names, asm, … *) let interpret_objdump objdump_out : asm_info_t = - (* TODO *) - ignore objdump_out; - assert false + + (* Reads a string of bytes formatted like "01 23 ae 3f" and output a Bytes.t + object *) + let read_bytes_str bytes_str = + let concat_chars chars = + String.concat "" (List.map (String.make 1) chars) + in + let bytes_out = + String.split_on_char ' ' bytes_str + |> List.map (fun x -> char_of_int @@ int_of_string ("0x" ^ x)) + |> concat_chars + |> Bytes.of_string + in + bytes_out + in + + (* Actually aggregates data line by line *) + let aggregate_info objdump_accu cur_line = + let get_char_opt str pos = + (try + Some (String.get str pos) + with Invalid_argument _ -> None) + in + + (match get_char_opt cur_line 0 with + (* Empty string *) + | None -> (* Commit current in-flight subroutine if any, reset *) + (match objdump_accu with + | ObjdumpAccu(_, _, None, []) -> + objdump_accu (* Nothing to be done *) + | ObjdumpAccu(cur_info, cur_section, Some in_flight, asm) -> + (* Commit in-flight *) + let full_sub = { in_flight with sub_asm = List.rev asm } in + ObjdumpAccu( + StrMap.add full_sub.sub_name full_sub cur_info, + cur_section, + None, + [] + ) + | ObjdumpAccu(_, _, None, _) -> + (* This state should not be reachable. Corrupt state. *) + raise (ParseError "Reached invalid state") + ) + (* Indented line: some asm line in a subroutine *) + | Some ' ' -> + (* Expected format: " HEX: *\t\(HEX \)+ *\(\t.*\)?$" + where the first HEX is the address, + the list of HEX (byte by byte) is the binary encoding of the + instruction, + the last optionnal part is its human-readable counterpart. + *) + (match String.split_on_char '\t' cur_line with + | addr_str :: bytes_str :: tl -> + (try + let addr = Scanf.sscanf addr_str " %x:" (fun x -> x) in + let bytes_repr = read_bytes_str @@ String.trim bytes_str in + let asm_repr = (match tl with + | [] -> "" + | str::[] -> String.trim str + | _ -> raise ( + ParseError + (Format.sprintf "Invalid subroutine line: \"%s\"" + cur_line)) + ) in + let line = { + instr_addr = addr; + instr_bytes = bytes_repr; + instr_asm = asm_repr + } in + (match objdump_accu with + | ObjdumpAccu(cur_info, cur_section, Some in_flight, asm) -> + ObjdumpAccu(cur_info, cur_section, Some in_flight, line::asm) + | ObjdumpAccu(_, _, None, _) -> + (* This state should not be reachable. Corrupt state. *) + raise (ParseError "Reached invalid state") + ) + with Scanf.Scan_failure msg -> + raise (ParseError ("Parsing subroutine line: " ^ msg)) + ) + | _ -> + raise (ParseError (Format.sprintf "Invalid subroutine line: \"%s\"" + cur_line)) + ) + (* "Disassembly of section …" line *) + | Some 'D' -> + let new_section = ( + try Scanf.sscanf cur_line "Disassembly of section %s@:" (fun x -> x) + with Scanf.Scan_failure msg -> + raise (ParseError ("Parsing section boundary line: " ^ msg))) + in + (match objdump_accu with + | ObjdumpAccu(cur_info, _, None, []) -> + ObjdumpAccu(cur_info, new_section, None, []) + | _ -> (* We should not change section in this state -- corrupt *) + raise (ParseError "Reached invalid state") + ) + (* Other line: it should only be a subroutine header *) + | _ -> + let sub_address, sub_name = + (try Scanf.sscanf cur_line "%016x <%s@>:" + (fun addr name -> (addr, name)) + with Scanf.Scan_failure msg -> + raise (ParseError ("Parsing subroutine header line: " ^ msg)) + ) in + (match objdump_accu with + | ObjdumpAccu(cur_info, cur_section, None, []) -> + ObjdumpAccu(cur_info, cur_section, Some { + sub_section = cur_section; + sub_name = sub_name; + sub_addr = sub_address; + sub_asm = []; + }, []) + | _ -> (* We should not change section in this state -- corrupt *) + raise (ParseError "Reached invalid state") + ) + ) + in + + let rec drop_k k lst = match (k, lst) with + | 0, lst -> lst + | k, (_::tl) -> drop_k (k-1) tl + | _, [] -> raise (Invalid_argument "drop_k on empty list") + in + + let result = + String.split_on_char '\n' objdump_out + |> drop_k 2 (* Two first lines are meaningless for us *) + |> List.fold_left aggregate_info + (ObjdumpAccu(StrMap.empty, "", None, [])) + in + (match result with + | ObjdumpAccu(result, _, None, []) -> result + | _ -> raise (ParseError "Invalid end state")) diff --git a/src/test_objdump.ml b/src/test_objdump.ml new file mode 100644 index 0000000..eb6bd25 --- /dev/null +++ b/src/test_objdump.ml @@ -0,0 +1,34 @@ +let uncaught_exn exc backtrace = + let descr = (match exc with + | Failure msg -> Format.sprintf " (%s)" msg + | _ -> "" + ) in + Format.eprintf "Uncaught exception %s%s. Backtrace:@.%s@." + (Printexc.exn_slot_name exc) + descr + (Printexc.raw_backtrace_to_string backtrace) ; + exit 1 + +let read_all handle = + let len = in_channel_length handle in + let buffer = Bytes.create len in + let bytes_read = input handle buffer 0 len in + if bytes_read <> len then + raise (Failure "Could not read whole file") + else + Bytes.to_string buffer + +let _ = + Printexc.record_backtrace true ; + Printexc.set_uncaught_exception_handler uncaught_exn ; + let in_path = "/tmp/objdump_dump" in + let in_handle = open_in in_path in + let objdump_out = read_all in_handle in + close_in in_handle ; + let parsed = Asm_acquire.interpret_objdump objdump_out in + Format.eprintf "%a" Asm_acquire.pp_asm_info parsed + (* + List.iter (fun asm_sub -> + Format.eprintf "%a" Asm_acquire.pp_asm_sub asm_sub) + parsed + *)