(** Module to acquire the ASM of a given ELF object by its path Uses `objdump -d` internally, and parses the output *) exception ParseError of string module AsmTypes = Asm_info.NoAnnot (** Pretty printers *) let pp_hex_bytes ppx bytes_array = (* Number of lone spaces to be printed after the bytes. Objdump prints 21. *) let remaining_spaces = 21 - 3 * (Bytes.length bytes_array) in Bytes.iter (fun byte -> Format.fprintf ppx "%02x " (int_of_char byte)) bytes_array ; Format.fprintf ppx "%s" (String.make remaining_spaces ' ') let pp_asm_instr ppx asm_instr = AsmTypes.( Format.fprintf ppx " %04x:\t%a\t%s@." asm_instr.instr_addr pp_hex_bytes asm_instr.instr_bytes asm_instr.instr_asm ) let pp_asm ppx asm_instrs = List.iter (pp_asm_instr ppx) asm_instrs let pp_asm_sub ppx asm_sub = AsmTypes.( Format.fprintf ppx "%016x <%s> {%s}:@.@[%a@]@." asm_sub.sub_addr asm_sub.sub_name asm_sub.sub_section pp_asm asm_sub.sub_asm ) let pp_asm_info ppx asm_info = List.iter (fun asm_sub -> Format.fprintf ppx "%a" pp_asm_sub asm_sub) asm_info (** Reads the whole content of a Unix file descriptor, returning it as a Bytes sequence *) let read_all_fd fd = Unix.set_nonblock fd ; let rec do_read accu cur_size = let n_accu = Bytes.extend accu 0 256 in let bytes_read = ( try Unix.read fd n_accu cur_size 256 with | Unix.Unix_error(Unix.EAGAIN, _, _) | Unix.Unix_error(Unix.EWOULDBLOCK, _, _) -> 0 ) in if bytes_read = 256 then do_read n_accu (cur_size + 256) else ( Bytes.extend n_accu 0 (bytes_read - 256) ) in do_read (Bytes.create 0) 0 (** Raised when the call to objdump failed. An exception [ObjdumpFailed (status_code, stderr)] will contain the status code of the objdump process, and its stderr contents as a string. *) exception ObjdumpFailed of int * string (** Runs `objdump -d prog_path` and returns its stdout as a string *) let get_objdump prog_path = (* Setup process *) let stdin_read, stdin = Unix.pipe () in let stdout, stdout_write = Unix.pipe () in let stderr, stderr_write = Unix.pipe () in (* Run the process *) Format.eprintf "Running objdump...@." ; let objdump_pid = Unix.create_process "objdump" [| "objdump" ; "-d" ; prog_path |] stdin_read stdout_write stderr_write in (* Close stdin already: we won't send anything and make it clear *) Unix.close stdin ; Format.eprintf "\tPID = %d@." objdump_pid; let pid, status = Unix.waitpid [] objdump_pid in if pid <> objdump_pid then raise (Failure ("Could not properly wait on objdump")) ; Format.eprintf "Run objdump: done. Status: %a@." (fun ppx sc -> match sc with | Unix.WEXITED ex_code -> Format.fprintf ppx "%d" ex_code | _ -> Format.fprintf ppx "Not exited") status ; (* Extract stdout, stderr data *) let stdout_content = read_all_fd stdout in let stderr_content = read_all_fd stderr in (* Properly close pipes *) Unix.close stdout ; Unix.close stderr ; (* Check status code *) match status with | Unix.WEXITED 0 -> Bytes.to_string stdout_content | Unix.WEXITED err_code -> raise (ObjdumpFailed (err_code, (Bytes.to_string stderr_content))) | _ -> raise (ObjdumpFailed (-1, (Bytes.to_string stderr_content))) (** [ObjdumpAccu cur_info cur_section cur_sub_stub cur_asm] contains: - all the previously encoutered and parsed subroutines in `cur_info`; - the current section name in `cur_section`; - the current subroutine's stub (ie. without asm) in `cur_sub_stub`, or None if the previous subroutine was committed; - the current subroutine's asm lines *) type interpret_objdump_accu = ObjdumpAccu of AsmTypes.asm_info_t * string * AsmTypes.asm_sub_t option * AsmTypes.asm_t (** Interprets `objdump -d` output and yield a list of functions, alongside with their addresses, symbol names, asm, … *) let interpret_objdump objdump_out : AsmTypes.asm_info_t = (* Reads a string of bytes formatted like "01 23 ae 3f" and output a Bytes.t object *) let read_bytes_str bytes_str = let concat_chars chars = String.concat "" (List.map (String.make 1) chars) in let bytes_out = String.split_on_char ' ' bytes_str |> List.map (fun x -> char_of_int @@ int_of_string ("0x" ^ x)) |> concat_chars |> Bytes.of_string in bytes_out in (* Actually aggregates data line by line *) let aggregate_info objdump_accu cur_line = let get_char_opt str pos = (try Some (String.get str pos) with Invalid_argument _ -> None) in (match get_char_opt cur_line 0 with (* Empty string *) | None -> (* Commit current in-flight subroutine if any, reset *) (match objdump_accu with | ObjdumpAccu(_, _, None, []) -> objdump_accu (* Nothing to be done *) | ObjdumpAccu(cur_info, cur_section, Some in_flight, asm) -> (* Commit in-flight *) let full_sub = { in_flight with sub_asm = List.rev asm } in ObjdumpAccu( full_sub::cur_info, cur_section, None, [] ) | ObjdumpAccu(_, _, None, _) -> (* This state should not be reachable. Corrupt state. *) raise (ParseError "Reached invalid state") ) (* Indented line: some asm line in a subroutine *) | Some ' ' -> (* Expected format: " HEX: *\t\(HEX \)+ *\(\t.*\)?$" where the first HEX is the address, the list of HEX (byte by byte) is the binary encoding of the instruction, the last optionnal part is its human-readable counterpart. *) (match String.split_on_char '\t' cur_line with | addr_str :: bytes_str :: tl -> (try let addr = Scanf.sscanf addr_str " %x:" (fun x -> x) in let bytes_repr = read_bytes_str @@ String.trim bytes_str in let asm_repr = (match tl with | [] -> "" | str::[] -> String.trim str | _ -> raise ( ParseError (Format.sprintf "Invalid subroutine line: \"%s\"" cur_line)) ) in let line = AsmTypes.({ instr_addr = addr; instr_bytes = bytes_repr; instr_asm = asm_repr; instr_annot = (); }) in (match objdump_accu with | ObjdumpAccu(cur_info, cur_section, Some in_flight, asm) -> ObjdumpAccu(cur_info, cur_section, Some in_flight, line::asm) | ObjdumpAccu(_, _, None, _) -> (* This state should not be reachable. Corrupt state. *) raise (ParseError "Reached invalid state") ) with Scanf.Scan_failure msg -> raise (ParseError ("Parsing subroutine line: " ^ msg)) ) | _ -> raise (ParseError (Format.sprintf "Invalid subroutine line: \"%s\"" cur_line)) ) (* "Disassembly of section …" line *) | Some 'D' -> let new_section = ( try Scanf.sscanf cur_line "Disassembly of section %s@:" (fun x -> x) with Scanf.Scan_failure msg -> raise (ParseError ("Parsing section boundary line: " ^ msg))) in (match objdump_accu with | ObjdumpAccu(cur_info, _, None, []) -> ObjdumpAccu(cur_info, new_section, None, []) | _ -> (* We should not change section in this state -- corrupt *) raise (ParseError "Reached invalid state") ) (* Other line: it should only be a subroutine header *) | _ -> let sub_address, sub_name = (try Scanf.sscanf cur_line "%016x <%s@>:" (fun addr name -> (addr, name)) with Scanf.Scan_failure msg -> raise (ParseError ("Parsing subroutine header line: " ^ msg)) ) in (match objdump_accu with | ObjdumpAccu(cur_info, cur_section, None, []) -> ObjdumpAccu(cur_info, cur_section, Some { sub_section = cur_section; sub_name = sub_name; sub_addr = sub_address; sub_asm = []; }, []) | _ -> (* We should not change section in this state -- corrupt *) raise (ParseError "Reached invalid state") ) ) in let rec drop_k k lst = match (k, lst) with | 0, lst -> lst | k, (_::tl) -> drop_k (k-1) tl | _, [] -> raise (Invalid_argument "drop_k on empty list") in let result = String.split_on_char '\n' objdump_out |> drop_k 2 (* Two first lines are meaningless for us *) |> List.fold_left aggregate_info (ObjdumpAccu([], "", None, [])) in (match result with | ObjdumpAccu(result, _, None, []) -> result |> List.rev | _ -> raise (ParseError "Invalid end state")) (** Extract asm infos from an ELF file path *) let acquire_asm elf_path = get_objdump elf_path |> interpret_objdump