From c54547dad90de7f05ceaf590f017f0f7d76f44d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Bastian?= Date: Thu, 26 Sep 2019 17:22:40 +0200 Subject: [PATCH] Add asm acquisition boilerplate --- src/asm_acquire.ml | 131 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 src/asm_acquire.ml diff --git a/src/asm_acquire.ml b/src/asm_acquire.ml new file mode 100644 index 0000000..fb3c21b --- /dev/null +++ b/src/asm_acquire.ml @@ -0,0 +1,131 @@ +(** Module to acquire the ASM of a given ELF object by its path + + Uses `objdump -d` internally, and parses the output +*) + +module StrMap = Map.Make(String) + +(** A memory address *) +type addr_t = int + +(** A single asm instruction *) +type asm_instr_t = { + instr_addr: addr_t; (** Memory location of this instruction *) + instr_bytes: Bytes.t; (** Binary representation of the instruction *) + instr_asm: string; (** Asm for the instruction (eg `movq …`) *) +} +type asm_t = asm_instr_t list +type asm_sub_t = { + sub_section: string; + sub_name: string; + sub_addr: addr_t; + sub_asm: asm_t; +} +type asm_info_t = asm_sub_t StrMap.t + +exception ParseError of string + +(** Pretty printers *) +let pp_hex_bytes ppx bytes_array = + Bytes.iter (fun byte -> Format.fprintf ppx "%02x" (int_of_char byte)) + bytes_array + +let pp_asm_instr ppx asm_instr = + Format.fprintf ppx "%04x:\t%a\t%s@." + asm_instr.instr_addr + pp_hex_bytes asm_instr.instr_bytes + asm_instr.instr_asm + +let pp_asm ppx asm_instrs = List.iter (pp_asm_instr ppx) asm_instrs + +let pp_asm_sub ppx asm_sub = + Format.fprintf ppx "%016x <%s> {%s}:@.@[%a@]@." + asm_sub.sub_addr + asm_sub.sub_name + asm_sub.sub_section + pp_asm asm_sub.sub_asm + +let pp_asm_info ppx asm_info = + StrMap.iter (fun _ asm_sub -> Format.fprintf ppx "%a" pp_asm_sub asm_sub) + asm_info + +(** Reads the whole content of a Unix file descriptor, returning it as a Bytes + sequence *) +let read_all_fd fd = + Unix.set_nonblock fd ; + + let rec do_read accu cur_size = + let n_accu = Bytes.extend accu 0 256 in + let bytes_read = ( + try + Unix.read fd n_accu cur_size 256 + with + | Unix.Unix_error(Unix.EAGAIN, _, _) + | Unix.Unix_error(Unix.EWOULDBLOCK, _, _) -> + 0 + ) in + + if bytes_read = 256 then + do_read n_accu (cur_size + 256) + else ( + Bytes.extend n_accu 0 (bytes_read - 256) + ) + in + do_read (Bytes.create 0) 0 + + +(** Raised when the call to objdump failed. An exception + [ObjdumpFailed (status_code, stderr)] will contain the status code of the + objdump process, and its stderr contents as a string. *) +exception ObjdumpFailed of int * string + + +(** Runs `objdump -d prog_path` and returns its stdout as a string *) +let get_objdump prog_path = + (* Setup process *) + let stdin_read, stdin = Unix.pipe () in + let stdout, stdout_write = Unix.pipe () in + let stderr, stderr_write = Unix.pipe () in + + (* Run the process *) + Format.eprintf "Running objdump...@." ; + let objdump_pid = Unix.create_process + "objdump" + [| "objdump" ; "-d" ; prog_path |] + stdin_read stdout_write stderr_write + in + (* Close stdin already: we won't send anything and make it clear *) + Unix.close stdin ; + Format.eprintf "\tPID = %d@." objdump_pid; + + let pid, status = Unix.waitpid [] objdump_pid in + if pid <> objdump_pid then + raise (Failure ("Could not properly wait on objdump")) ; + + Format.eprintf "Run objdump: done. Status: %a@." (fun ppx sc -> match sc with + | Unix.WEXITED ex_code -> Format.fprintf ppx "%d" ex_code + | _ -> Format.fprintf ppx "Not exited") status ; + + (* Extract stdout, stderr data *) + let stdout_content = read_all_fd stdout in + let stderr_content = read_all_fd stderr in + + (* Properly close pipes *) + Unix.close stdout ; + Unix.close stderr ; + + (* Check status code *) + match status with + | Unix.WEXITED 0 -> + Bytes.to_string stdout_content + | Unix.WEXITED err_code -> + raise (ObjdumpFailed (err_code, (Bytes.to_string stderr_content))) + | _ -> + raise (ObjdumpFailed (-1, (Bytes.to_string stderr_content))) + +(** Interprets `objdump -d` output and yield a list of functions, alongside + with their addresses, symbol names, asm, … *) +let interpret_objdump objdump_out : asm_info_t = + (* TODO *) + ignore objdump_out; + assert false