dwarf-assembly/generate_eh_elf.py

439 lines
13 KiB
Python
Raw Normal View History

2018-04-25 17:33:59 +02:00
#!/usr/bin/env python3
"""
Generates the `.eh_frame` equivalent in C code of the given ELF file, and
all the shared objects it depends upon.
"""
import os
import sys
import subprocess
import tempfile
import argparse
from enum import Enum
2018-05-09 17:55:57 +02:00
2019-07-16 11:18:06 +02:00
from shared_python import (
elf_so_deps,
do_remote,
is_newer,
to_eh_elf_path,
find_eh_elf_dir,
DEFAULT_AUX_DIRS,
)
from extract_pc import generate_pc_list
2018-04-25 17:33:59 +02:00
DWARF_ASSEMBLY_BIN = os.path.join(
2019-07-16 11:18:06 +02:00
os.path.dirname(os.path.abspath(sys.argv[0])), "dwarf-assembly"
)
C_BIN = "gcc" if "C" not in os.environ else os.environ["C"]
2018-04-25 17:33:59 +02:00
class SwitchGenPolicy(Enum):
2019-07-16 11:18:06 +02:00
""" The various switch generation policies possible """
SWITCH_PER_FUNC = "--switch-per-func"
GLOBAL_SWITCH = "--global-switch"
class Config:
2019-07-16 11:18:06 +02:00
""" Holds the run's settings """
2018-06-22 08:56:20 +02:00
default_aux = DEFAULT_AUX_DIRS
2018-06-20 14:12:11 +02:00
2019-07-16 11:18:06 +02:00
def __init__(
self,
output,
aux,
no_dft_aux,
objects,
sw_gen_policy=SwitchGenPolicy.GLOBAL_SWITCH,
force=False,
use_pc_list=False,
c_opt_level="3",
enable_deref_arg=False,
keep_holes=False,
cc_debug=False,
remote=None,
):
self.output = "." if output is None else output
self.aux = aux + ([] if no_dft_aux else self.default_aux)
self.objects = objects
self.sw_gen_policy = sw_gen_policy
self.force = force
self.use_pc_list = use_pc_list
self.c_opt_level = c_opt_level
self.enable_deref_arg = enable_deref_arg
self.keep_holes = keep_holes
self.cc_debug = cc_debug
self.remote = remote
2018-06-20 14:12:11 +02:00
@staticmethod
def default_aux_str():
2019-07-16 11:18:06 +02:00
return ", ".join(Config.default_aux)
2018-06-20 14:12:11 +02:00
def dwarf_assembly_args(self):
2019-07-16 11:18:06 +02:00
""" Arguments to `dwarf_assembly` """
out = []
out.append(self.sw_gen_policy.value)
if self.enable_deref_arg:
2019-07-16 11:18:06 +02:00
out.append("--enable-deref-arg")
if self.keep_holes:
2019-07-16 11:18:06 +02:00
out.append("--keep-holes")
return out
def cc_opts(self):
2019-07-16 11:18:06 +02:00
""" Options to pass to the C compiler """
out = ["-fPIC"]
if self.cc_debug:
2019-07-16 11:18:06 +02:00
out.append("-g")
out.append(self.opt_level())
return out
def opt_level(self):
2019-07-16 11:18:06 +02:00
""" The optimization level to pass to gcc """
return "-O{}".format(self.c_opt_level)
2018-06-20 14:12:11 +02:00
def aux_dirs(self):
2019-07-16 11:18:06 +02:00
""" Get the list of auxiliary directories """
2018-06-20 14:12:11 +02:00
return self.aux
def gen_dw_asm_c(obj_path, out_path, config, pc_list_path=None):
2019-07-16 11:18:06 +02:00
""" Generate the C code produced by dwarf-assembly from `obj_path`, saving
it as `out_path` """
2018-04-25 17:33:59 +02:00
dw_assembly_args = config.dwarf_assembly_args()
if pc_list_path is not None:
2019-07-16 11:18:06 +02:00
dw_assembly_args += ["--pc-list", pc_list_path]
2018-04-25 17:33:59 +02:00
try:
2019-07-16 11:18:06 +02:00
with open(out_path, "w") as out_handle:
# TODO enhance error handling
2019-07-16 11:18:06 +02:00
command_args = [DWARF_ASSEMBLY_BIN, obj_path] + dw_assembly_args
dw_asm_output = subprocess.check_output(command_args).decode("utf-8")
2018-04-25 17:33:59 +02:00
out_handle.write(dw_asm_output)
2018-05-09 17:55:57 +02:00
except subprocess.CalledProcessError as exn:
2018-04-25 17:33:59 +02:00
raise Exception(
2019-07-16 11:18:06 +02:00
(
"Cannot generate C code from object file {} using {}: process "
"terminated with exit code {}."
).format(obj_path, DWARF_ASSEMBLY_BIN, exn.returncode)
)
def resolve_symlink_chain(objpath):
2019-07-16 11:18:06 +02:00
""" Resolves a symlink chain. This returns a pair `(new_obj, chain)`,
`new_obj` being the canonical path for `objpath`, and `chain` being a list
representing the path followed, eg. `[(objpath, a), (a, b), (b, new_obj)]`.
The goal of this function is to allow reproducing symlink architectures at
2019-07-16 11:18:06 +02:00
the eh_elf level. """
chain = []
out_path = objpath
while os.path.islink(out_path):
new_path = os.readlink(out_path)
if not os.path.isabs(new_path):
new_path = os.path.join(os.path.dirname(out_path), new_path)
chain.append((out_path, new_path))
out_path = new_path
return (out_path, chain)
2018-06-20 14:12:11 +02:00
def find_out_dir(obj_path, config):
2019-07-16 11:18:06 +02:00
""" Find the directory in which the eh_elf corresponding to `obj_path` will
be outputted, among the output directory and the aux directories """
2018-06-20 14:12:11 +02:00
2018-06-22 08:56:20 +02:00
return find_eh_elf_dir(obj_path, config.aux_dirs(), config.output)
2018-06-20 14:12:11 +02:00
def gen_eh_elf(obj_path, config):
2019-07-16 11:18:06 +02:00
""" Generate the eh_elf corresponding to `obj_path`, saving it as
2018-04-25 17:33:59 +02:00
`out_dir/$(basename obj_path).eh_elf.so` (or in the current working
2019-07-16 11:18:06 +02:00
directory if out_dir is None) """
2018-04-25 17:33:59 +02:00
2018-06-20 14:12:11 +02:00
out_dir = find_out_dir(obj_path, config)
obj_path, link_chain = resolve_symlink_chain(obj_path)
2018-04-25 17:33:59 +02:00
print("> {}...".format(os.path.basename(obj_path)))
link_chain = map(
2018-06-22 08:56:20 +02:00
lambda elt: (
to_eh_elf_path(elt[0], out_dir),
2019-07-16 11:18:06 +02:00
os.path.basename(to_eh_elf_path(elt[1], out_dir)),
),
link_chain,
)
2018-06-20 14:12:11 +02:00
out_base_name = to_eh_elf_path(obj_path, out_dir, base=True)
out_so_path = to_eh_elf_path(obj_path, out_dir, base=False)
2019-07-16 11:18:06 +02:00
pc_list_dir = os.path.join(out_dir, "pc_list")
if is_newer(out_so_path, obj_path) and not config.force:
2018-05-09 17:55:57 +02:00
return # The object is recent enough, no need to recreate it
2018-04-25 17:33:59 +02:00
2018-06-20 14:12:11 +02:00
if os.path.exists(out_dir) and not os.path.isdir(out_dir):
2019-07-16 11:18:06 +02:00
raise Exception("The output path {} is not a directory.".format(out_dir))
2018-06-20 14:12:11 +02:00
if not os.path.exists(out_dir):
os.makedirs(out_dir, exist_ok=True)
2018-04-25 17:33:59 +02:00
with tempfile.TemporaryDirectory() as compile_dir:
# Generate PC list
pc_list_path = None
if config.use_pc_list:
2019-07-16 11:18:06 +02:00
pc_list_path = os.path.join(pc_list_dir, out_base_name + ".pc_list")
os.makedirs(pc_list_dir, exist_ok=True)
2019-07-16 11:18:06 +02:00
print("\tGenerating PC list…")
generate_pc_list(obj_path, pc_list_path)
2018-04-25 17:33:59 +02:00
# Generate the C source file
print("\tGenerating C…")
2019-07-16 11:18:06 +02:00
c_path = os.path.join(compile_dir, (out_base_name + ".c"))
gen_dw_asm_c(obj_path, c_path, config, pc_list_path)
2018-04-25 17:33:59 +02:00
# Compile it into a .o
print("\tCompiling into .o…")
2019-07-16 11:18:06 +02:00
o_path = os.path.join(compile_dir, (out_base_name + ".o"))
if config.remote:
remote_out = do_remote(
config.remote,
2019-07-16 11:18:06 +02:00
[C_BIN, "-o", out_base_name + ".o", "-c", out_base_name + ".c"]
+ config.cc_opts(),
send_files=[c_path],
2019-07-16 11:18:06 +02:00
retr_files=[(out_base_name + ".o", o_path)],
)
call_rc = 1 if remote_out is None else 0
else:
call_rc = subprocess.call(
2019-07-16 11:18:06 +02:00
[C_BIN, "-o", o_path, "-c", c_path, config.opt_level(), "-fPIC"]
)
if call_rc != 0:
2018-04-25 17:33:59 +02:00
raise Exception("Failed to compile to a .o file")
# Compile it into a .so
print("\tCompiling into .so…")
2019-07-16 11:18:06 +02:00
call_rc = subprocess.call([C_BIN, "-o", out_so_path, "-shared", o_path])
if call_rc != 0:
2018-04-25 17:33:59 +02:00
raise Exception("Failed to compile to a .so file")
# Re-create symlinks
for elt in link_chain:
if os.path.exists(elt[0]):
if not os.path.islink(elt[0]):
raise Exception(
2019-07-16 11:18:06 +02:00
"{}: file already exists and is not a symlink.".format(elt[0])
)
os.remove(elt[0])
os.symlink(elt[1], elt[0])
2018-04-25 17:33:59 +02:00
def gen_all_eh_elf(obj_path, config):
2019-07-16 11:18:06 +02:00
""" Call `gen_eh_elf` on obj_path and all its dependencies """
2018-04-25 17:33:59 +02:00
deps = elf_so_deps(obj_path)
deps.append(obj_path)
for dep in deps:
gen_eh_elf(dep, config)
2019-07-16 11:18:06 +02:00
def gen_eh_elfs(obj_path, out_dir, global_switch=True, deps=True, remote=None):
""" Call gen{_all,}_eh_elf with args setup accordingly with the given
options """
switch_gen_policy = (
2019-07-16 11:18:06 +02:00
SwitchGenPolicy.GLOBAL_SWITCH
if global_switch
else SwitchGenPolicy.SWITCH_PER_FUNC
)
config = Config(
2019-07-16 11:18:06 +02:00
out_dir, [], False, [obj_path], sw_gen_policy=switch_gen_policy, remote=remote
)
if deps:
return gen_all_eh_elf([obj_path], config)
2018-06-22 08:56:20 +02:00
return gen_eh_elf([obj_path], config)
2018-04-25 17:33:59 +02:00
def process_args():
2019-07-16 11:18:06 +02:00
""" Process `sys.argv` arguments """
2018-04-25 17:33:59 +02:00
parser = argparse.ArgumentParser(
2019-07-16 11:18:06 +02:00
description="Compile ELFs into their related eh_elfs"
2018-04-25 17:33:59 +02:00
)
2019-07-16 11:18:06 +02:00
parser.add_argument(
"--deps",
action="store_const",
const=gen_all_eh_elf,
default=gen_eh_elf,
dest="gen_func",
help=("Also generate eh_elfs for the shared objects " "this object depends on"),
)
parser.add_argument(
"-o",
"--output",
metavar="path",
help=(
"Save the generated objects at the given path "
"instead of the current working directory"
),
)
parser.add_argument(
"-a",
"--aux",
action="append",
default=[],
help=(
"Alternative output directories. These "
"directories are searched for existing matching "
"eh_elfs, and if found, these files are updated "
"instead of creating new files in the --output "
"directory. By default, some aux directories "
"are always considered, unless -A is passed: "
"{}."
).format(Config.default_aux_str()),
)
parser.add_argument(
"-A",
"--no-dft-aux",
action="store_true",
help=("Do not use the default auxiliary output " "directories: {}.").format(
Config.default_aux_str()
),
)
parser.add_argument(
"--remote",
metavar="ssh_args",
help=(
"Execute the heavyweight commands on the remote "
"machine, using `ssh ssh_args`."
),
)
parser.add_argument(
"--use-pc-list",
action="store_true",
help=(
"Generate a PC list using `extract_pc.py` for "
"each processed ELF file, and call "
"dwarf-assembly accordingly."
),
)
parser.add_argument(
"--force",
"-f",
action="store_true",
help=(
"Force re-generation of the output files, even "
"when those files are newer than the target "
"ELF."
),
)
parser.add_argument(
"--enable-deref-arg",
action="store_true",
help=(
"Pass the `--enable-deref-arg` to "
"dwarf-assembly, enabling an extra `deref` "
"argument for each lookup function, allowing "
"to work on remote address spaces."
),
)
parser.add_argument(
"--keep-holes",
action="store_true",
help=(
"Keep holes between FDEs instead of filling "
"them with junk. More accurate, less compact."
),
)
parser.add_argument(
"-g",
"--cc-debug",
action="store_true",
help=("Compile the source file with -g for easy " "debugging"),
)
# c_opt_level
opt_level_grp = parser.add_mutually_exclusive_group()
2019-07-16 11:18:06 +02:00
opt_level_grp.add_argument(
"-O0",
action="store_const",
const="0",
dest="c_opt_level",
help=("Compile C file with this optimization " "level."),
)
opt_level_grp.add_argument(
"-O1",
action="store_const",
const="1",
dest="c_opt_level",
help=("Compile C file with this optimization " "level."),
)
opt_level_grp.add_argument(
"-O2",
action="store_const",
const="2",
dest="c_opt_level",
help=("Compile C file with this optimization " "level."),
)
opt_level_grp.add_argument(
"-O3",
action="store_const",
const="3",
dest="c_opt_level",
help=("Compile C file with this optimization " "level."),
)
opt_level_grp.add_argument(
"-Os",
action="store_const",
const="s",
dest="c_opt_level",
help=("Compile C file with this optimization " "level."),
)
opt_level_grp.set_defaults(c_opt_level="3")
switch_gen_policy = parser.add_mutually_exclusive_group(required=True)
switch_gen_policy.add_argument(
"--switch-per-func",
dest="sw_gen_policy",
action="store_const",
const=SwitchGenPolicy.SWITCH_PER_FUNC,
help=("Passed to dwarf-assembly."),
)
switch_gen_policy.add_argument(
"--global-switch",
dest="sw_gen_policy",
action="store_const",
const=SwitchGenPolicy.GLOBAL_SWITCH,
help=("Passed to dwarf-assembly."),
)
parser.add_argument("object", nargs="+", help="The ELF object(s) to process")
2018-04-25 17:33:59 +02:00
return parser.parse_args()
def main():
args = process_args()
config = Config(
output=args.output,
aux=args.aux,
no_dft_aux=args.no_dft_aux,
objects=args.object,
sw_gen_policy=args.sw_gen_policy,
force=args.force,
use_pc_list=args.use_pc_list,
c_opt_level=args.c_opt_level,
enable_deref_arg=args.enable_deref_arg,
keep_holes=args.keep_holes,
cc_debug=args.cc_debug,
remote=args.remote,
)
2018-04-25 17:33:59 +02:00
for obj in args.object:
args.gen_func(obj, config)
2018-04-25 17:33:59 +02:00
if __name__ == "__main__":
main()