Compare commits

...

4 commits

8 changed files with 401 additions and 50 deletions

5
dune
View file

@ -1,4 +1,5 @@
(include_subdirs unqualified)
(executable
(name test_objdump)
(libraries linksem))
(name elf_arrows)
(libraries linksem jingoo))

View file

@ -3,26 +3,10 @@
Uses `objdump -d` internally, and parses the output
*)
(** A memory address *)
type addr_t = int
(** A single asm instruction *)
type asm_instr_t = {
instr_addr: addr_t; (** Memory location of this instruction *)
instr_bytes: Bytes.t; (** Binary representation of the instruction *)
instr_asm: string; (** Asm for the instruction (eg `movq …`) *)
}
type asm_t = asm_instr_t list
type asm_sub_t = {
sub_section: string;
sub_name: string;
sub_addr: addr_t;
sub_asm: asm_t;
}
type asm_info_t = asm_sub_t list
exception ParseError of string
module AsmTypes = Asm_info.NoAnnot
(** Pretty printers *)
let pp_hex_bytes ppx bytes_array =
(* Number of lone spaces to be printed after the bytes. Objdump prints 21. *)
@ -32,19 +16,23 @@ let pp_hex_bytes ppx bytes_array =
Format.fprintf ppx "%s" (String.make remaining_spaces ' ')
let pp_asm_instr ppx asm_instr =
Format.fprintf ppx " %04x:\t%a\t%s@."
asm_instr.instr_addr
pp_hex_bytes asm_instr.instr_bytes
asm_instr.instr_asm
AsmTypes.(
Format.fprintf ppx " %04x:\t%a\t%s@."
asm_instr.instr_addr
pp_hex_bytes asm_instr.instr_bytes
asm_instr.instr_asm
)
let pp_asm ppx asm_instrs = List.iter (pp_asm_instr ppx) asm_instrs
let pp_asm_sub ppx asm_sub =
Format.fprintf ppx "%016x <%s> {%s}:@.@[%a@]@."
asm_sub.sub_addr
asm_sub.sub_name
asm_sub.sub_section
pp_asm asm_sub.sub_asm
AsmTypes.(
Format.fprintf ppx "%016x <%s> {%s}:@.@[%a@]@."
asm_sub.sub_addr
asm_sub.sub_name
asm_sub.sub_section
pp_asm asm_sub.sub_asm
)
let pp_asm_info ppx asm_info =
List.iter (fun asm_sub -> Format.fprintf ppx "%a" pp_asm_sub asm_sub)
@ -131,12 +119,14 @@ let get_objdump prog_path =
None if the previous subroutine was committed;
- the current subroutine's asm lines
*)
type interpret_objdump_accu =
ObjdumpAccu of asm_info_t * string * asm_sub_t option * asm_t
ObjdumpAccu of AsmTypes.asm_info_t * string * AsmTypes.asm_sub_t option *
AsmTypes.asm_t
(** Interprets `objdump -d` output and yield a list of functions, alongside
with their addresses, symbol names, asm, *)
let interpret_objdump objdump_out : asm_info_t =
let interpret_objdump objdump_out : AsmTypes.asm_info_t =
(* Reads a string of bytes formatted like "01 23 ae 3f" and output a Bytes.t
object *)
@ -201,11 +191,12 @@ let interpret_objdump objdump_out : asm_info_t =
(Format.sprintf "Invalid subroutine line: \"%s\""
cur_line))
) in
let line = {
let line = AsmTypes.({
instr_addr = addr;
instr_bytes = bytes_repr;
instr_asm = asm_repr
} in
instr_asm = asm_repr;
instr_annot = ();
}) in
(match objdump_accu with
| ObjdumpAccu(cur_info, cur_section, Some in_flight, asm) ->
ObjdumpAccu(cur_info, cur_section, Some in_flight, line::asm)

28
src/asm_info.ml Normal file
View file

@ -0,0 +1,28 @@
module type Annot_type = sig
type instr_annot_t
end
module S (Annot: Annot_type) = struct
(** A memory address *)
type addr_t = int
module AddrMap = Map.Make(struct type t = addr_t let compare = compare end)
(** A single asm instruction *)
type asm_instr_t = {
instr_addr: addr_t; (** Memory location of this instruction *)
instr_bytes: Bytes.t; (** Binary representation of the instruction *)
instr_asm: string; (** Asm for the instruction (eg `movq …`) *)
instr_annot: Annot.instr_annot_t; (** User-defined annotation *)
}
type asm_t = asm_instr_t list
type asm_sub_t = {
sub_section: string;
sub_name: string;
sub_addr: addr_t;
sub_asm: asm_t;
}
type asm_info_t = asm_sub_t list
end
module NoAnnot = S(struct type instr_annot_t = unit end)

View file

@ -12,7 +12,9 @@ let make_pp linksem_pp =
|> Format.fprintf fmt "%s")
type elf_path = string
type elf_handle = ElfHandle of elf_path * Elf_file.elf_file * Dwarf.dwarf_static
type elf_handle =
ElfHandle of elf_path * Elf_file.elf_file * Dwarf.dwarf_static
* Asm_info.NoAnnot.asm_info_t
(** Open an ELF file and loads its dwarf_static infos. *)
let open_elf elf_path =
@ -25,12 +27,36 @@ let open_elf elf_path =
| Error.Fail _ -> raise NotElf)
))
>>= fun elf_file -> (
let asm_info = Asm_acquire.acquire_asm elf_path in
let static_info = (match Dwarf.extract_dwarf_static elf_file with
| Some static_info -> static_info
| None -> raise NoDwarfStatic) in
Error.Success (ElfHandle (elf_path, elf_file, static_info)))
Error.Success (ElfHandle (elf_path, elf_file, static_info, asm_info)))
(** Add boxes according to DWARF info *)
let add_line_boxes render_data dwarf_lines =
(* List.fold_left with two elements in scope at once. *)
let fold_ahead folder base lst =
let rec do_fold accu = function
| [] | _::[] -> accu
| hd1::hd2::tl -> do_fold (folder accu hd1 hd2) (hd2::tl)
in
do_fold base lst
in
List.fold_left (fun cur_render_data (_, reg_list) ->
fold_ahead (fun cur_render_data cur_reg reg_ahead ->
let box_start = Z.to_int @@ cur_reg.Dwarf.lnr_address in
let box_end = Z.to_int @@ reg_ahead.Dwarf.lnr_address in
Format.eprintf "Add box %x -- %x@." box_start box_end ;
let n_render_data, _ = Renderer.add_box_excl
cur_render_data (box_start, box_end) in
n_render_data)
cur_render_data reg_list)
render_data
dwarf_lines
let elf_files = ref []
let speclist = []
let parse_anon_arg arg =
@ -46,10 +72,18 @@ let _ =
!elf_files
in
List.iter (fun (ElfHandle(path, _, static_info)) ->
let line_info = static_info.ds_evaluated_line_info in
Format.eprintf "Line infos <%s>:@. %a@."
path
(make_pp Dwarf.pp_evaluated_line_info)
line_info ;
) elf_handles
List.iter (fun (ElfHandle(path, _, static_info, _)) ->
let line_info = static_info.ds_evaluated_line_info in
Format.eprintf "Line infos <%s>:@. %a@."
path
(make_pp Dwarf.pp_evaluated_line_info)
line_info ;
) elf_handles ;
List.iter (fun (ElfHandle(_, _, static_info, asm)) ->
let render_data = Renderer.init_render_data asm in
let line_info = static_info.ds_evaluated_line_info in
let boxed_render_data = add_line_boxes render_data line_info in
Format.printf "%s@."
(Renderer.to_string Html_renderer.render boxed_render_data)
) elf_handles;

46
src/html_renderer.ml Normal file
View file

@ -0,0 +1,46 @@
(** HTML Renderer
Renders the output of computations on a given program as an HTML webpage.
*)
(** Box `AnnotAsm.asm_info_t` as a Jingoo tvalue *)
let render_prog_box annotated_prog = Jingoo.Jg_types.(Renderer.AnnotAsm.(
let render_addr addr = (Format.sprintf "%04x" addr) in
List.map (fun render_sub ->
[
("sub_section", box_string render_sub.sub_section);
("sub_name", box_string render_sub.sub_name);
("sub_addr", box_string (Format.sprintf "%016x" render_sub.sub_addr));
("sub_asm", List.map (fun row -> [
("instr_addr", box_string (render_addr row.instr_addr));
("instr_bytes", box_string (
Format.asprintf "%a" Asm_acquire.pp_hex_bytes row.instr_bytes
));
("instr_asm", box_string row.instr_asm);
("instr_events", (List.map (fun event ->
let typ, id, bound = Renderer.(match event with
| BoxStart(id, bound) -> "start", id, bound
| BoxEnd(id, bound) -> "end", id, bound
) in
[
("typ", box_string typ);
("id", box_int id);
("bound", box_string @@ render_addr bound);
] |> box_obj)
row.instr_annot.Renderer.events)
|> box_list);
] |> box_obj) render_sub.sub_asm
|> box_list;
)] |> box_obj
) annotated_prog
|> box_list
))
(** [render render_data] renders the given [render_data] to a string. *)
let render render_data = Jingoo.(
let annotated_prog = Renderer.render_data_to_annotated_asm render_data in
let models = [
("subroutines", render_prog_box annotated_prog);
] in
Jg_template.from_file "src/render_html.jingoo" ~models:models
)

67
src/render_html.jingoo Normal file
View file

@ -0,0 +1,67 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<style>
body {
font-family: "mono";
font-size: 12px;
}
.subroutine {
margin: 20px 0;
}
.sub_body {
margin-left: 4ex;
}
.addr {
color: green;
}
.instr_bytes pre {
display: inline;
font-size: 12px;
font-family: "mono";
}
.instr_bytes {
color: grey;
}
.instr_box {
background-color: #ffffb4;
}
</style>
</head>
<body>
{% for subroutine in subroutines %}
<div class="subroutine">
<div class="sub_headline">
<span class="addr">{{ subroutine.sub_addr }}</span>
<span class="sub_section">[{{ subroutine.sub_section }}]</span>
<span class="sub_name">&lt;{{ subroutine.sub_name }}&gt;</span>:
</div>
<div class="sub_body">
{% for row in subroutine.sub_asm %}
{% for event in row.instr_events %}
{% if event.typ == "start" %}
<div class="instr_box" id="instr_box_{{ event.id }}">
{% endif %}
{% endfor %}
<div class="sub_instr">
<span class="addr instr_addr">{{ row.instr_addr }}</span>
<span class="instr_bytes"><pre>{{ row.instr_bytes }}</pre></span>
<span class="instr_asm">{{ row.instr_asm }}</span>
</div>
{% for event in row.instr_events %}
{% if event.typ == "end" %}
</div> <!-- END instr_box_{{ event.id }} -->
{% endif %}
{% endfor %}
{% endfor %}
</div>
</div>
{% endfor %}
</body>
</html>

182
src/renderer.ml Normal file
View file

@ -0,0 +1,182 @@
(** Abstract Renderer
Abstract generic renderer, with tools to ease and unify the output of
computations on a given program in any chosen format.
*)
module RawAsm = Asm_info.NoAnnot
type addr_range_t = RawAsm.addr_t * RawAsm.addr_t
type addr_range_tag_t = int
type tag_addr_range_t = TaggedRange of addr_range_tag_t * addr_range_t
type render_data_t = {
render_prog : RawAsm.asm_info_t ;
render_boxes : tag_addr_range_t list ;
render_prev_address : RawAsm.addr_t RawAsm.AddrMap.t
}
type render_box_event_t =
BoxStart of addr_range_tag_t * RawAsm.addr_t
| BoxEnd of addr_range_tag_t * RawAsm.addr_t
type asm_annot_type = {
events : render_box_event_t list
}
module AnnotAsm = Asm_info.S(struct type instr_annot_t = asm_annot_type end)
(** Make a map mapping addresses to the preceding instruction address appearing
in `prog` *)
let make_prev_address prog =
let _, addr_map =
List.fold_left (fun (prev_addr, cmap) sub ->
List.fold_left (fun (prev_addr, cmap) instr ->
(instr.RawAsm.instr_addr,
RawAsm.AddrMap.add instr.RawAsm.instr_addr prev_addr cmap)
)
(prev_addr, cmap)
sub.RawAsm.sub_asm
)
(0, RawAsm.AddrMap.empty)
prog
in
addr_map
(** Initializes a rendering data structure on the given program *)
let init_render_data prog =
{ render_prog = prog ;
render_boxes = [] ;
render_prev_address = make_prev_address prog ;
}
(** [add_box render_data range] adds a box around an address range, inclusive
of both bounds, in [render_data] and returns the new render data and the
newly inserted element's id *)
let add_box render_data range : render_data_t * int =
let elt_id = (match render_data.render_boxes with
| [] -> 0
| TaggedRange(prev_id, _)::_ -> prev_id + 1) in
{
render_data with
render_boxes = (TaggedRange(elt_id, range)) :: render_data.render_boxes
}, elt_id
(** Same as [add_box], with an included start, excluded end for range. *)
let add_box_excl render_data (addr_beg, addr_end) =
(match RawAsm.AddrMap.find_opt addr_end render_data.render_prev_address with
| None -> Format.eprintf "Box end address %x not found, ignoring box."
addr_end ;
raise Not_found
| Some end_bound -> add_box render_data (addr_beg, end_bound)
)
(** [to_file renderer render_data path] renders the given [render_data] to a
file at [path] using the provided specific renderer *)
let to_file renderer render_data path =
let handle = open_out path in
render_data
|> renderer
|> output_string handle ;
close_out handle
(** [to_string renderer render_data] renders the given [render_data] to a
returned string using the provided specific renderer.
This function is provided for unification and clarity, but is an alias for
[renderer].
*)
let to_string renderer render_data =
renderer render_data
let extract_event_addr box_event = match box_event with
| BoxStart(_, addr) -> addr
| BoxEnd(_, addr) -> addr
(** [boxes_to_events render_data] converts the list of boxes of `render_data`
to a list of events, that is, an address-ordered list of
`render_box_event_t`
*)
let boxes_to_events render_data =
let compare_events e1 e2 =
match (compare (extract_event_addr e1) (extract_event_addr e2)) with
| 0 -> (match e1, e2 with
| BoxEnd(_), BoxStart(_) -> -1
| BoxStart(_), BoxEnd(_) -> 1
| BoxStart(id1, _), BoxStart(id2, _) -> compare id1 id2
| BoxEnd(id1, _), BoxEnd(id2, _) -> compare id1 id2
)
| n -> n
in
let unordered_events = List.fold_left
(fun accu (TaggedRange(elt_id, (range_beg, range_end))) ->
(BoxStart(elt_id, range_beg))
:: (BoxEnd(elt_id, range_end))
:: accu
)
[]
render_data.render_boxes
in
let ordered_events = List.sort
compare_events
unordered_events
in
ordered_events
(** Transforms `render_data` into an `asm_info_t` whose asm lines are annotated
with events, for ease of rendering *)
let render_data_to_annotated_asm render_data : AnnotAsm.asm_info_t =
let render_events = boxes_to_events render_data in
(* Splits `events` into two lists: a list of events whose addresses match
`addr` and the remaining tail list. *)
let extract_current_events events addr =
let reached_stop event = (extract_event_addr event) > addr in
let rec fold_until accu l = match l with
| (hd::_) as cur_l when reached_stop hd -> (List.rev accu), cur_l
| hd::tl -> fold_until (hd::accu) tl
| [] -> (List.rev accu), []
in
fold_until [] events
in
let map_asm render_events lst =
let asm, leftover_events = List.fold_left
(fun (accu, render_events) asm_instr ->
let current_events, future_events =
extract_current_events render_events RawAsm.(asm_instr.instr_addr)
in
(
{
AnnotAsm.instr_addr = RawAsm.(asm_instr.instr_addr) ;
AnnotAsm.instr_bytes = RawAsm.(asm_instr.instr_bytes) ;
AnnotAsm.instr_asm = RawAsm.(asm_instr.instr_asm) ;
AnnotAsm.instr_annot = {
events = current_events
}
}::accu
), future_events
)
([], render_events)
lst
in
(List.rev asm, leftover_events)
in
let annotated_asm, _ =
List.fold_left (fun (accu, render_events) sub ->
let mapped_asm, n_render_events =
map_asm render_events RawAsm.(sub.sub_asm) in
(
{
AnnotAsm.sub_section = RawAsm.(sub.sub_section) ;
AnnotAsm.sub_name = RawAsm.(sub.sub_name) ;
AnnotAsm.sub_addr = RawAsm.(sub.sub_addr) ;
AnnotAsm.sub_asm = mapped_asm ;
} :: accu
), n_render_events
)
([], render_events)
render_data.render_prog
in
List.rev annotated_asm

View file

@ -13,8 +13,11 @@ let read_all handle =
let len = in_channel_length handle in
let buffer = Bytes.create len in
let bytes_read = input handle buffer 0 len in
if bytes_read <> len then
if bytes_read <> len then (
Format.eprintf "Could not read whole file. Read %d, expected %d.@."
bytes_read len ;
raise (Failure "Could not read whole file")
)
else
Bytes.to_string buffer
@ -26,9 +29,8 @@ let _ =
let objdump_out = read_all in_handle in
close_in in_handle ;
let parsed = Asm_acquire.interpret_objdump objdump_out in
Format.eprintf "%a" Asm_acquire.pp_asm_info parsed
(*
List.iter (fun asm_sub ->
Format.eprintf "%a" Asm_acquire.pp_asm_sub asm_sub)
parsed
*)
let render_data = Renderer.init_render_data parsed in
let render_data, box1 = Renderer.add_box render_data (0x1004, 0x100f) in
ignore box1 ;
Format.printf "%s@." (Renderer.to_string Html_renderer.render render_data)