#!/usr/bin/env python3 """ Compare the sizes of the .eh_frame section in the original binary and of the .text in the generated .eh_elf.so. """ import argparse import os import subprocess from collections import namedtuple from shared_python import elf_so_deps, readlink_rec, DEFAULT_AUX_DIRS ''' An ELF object, including the path to the ELF itself, and the path to its matching eh_elf ''' ElfObject = namedtuple('ElfObject', 'elf eh_elf') def format_size(size): ''' Format a size to a human-readable string ''' units = ['B', 'KiB', 'MiB', 'GiB'] # We'll never go over that cur_unit = 0 while cur_unit < len(units) and size >= 1024: size /= 1024 cur_unit += 1 return '{:.1f} {}'.format(size, units[cur_unit]) def invoke_objdump_headers(elf_loc): ''' Call objdump -h, returning the list of lines outputted ''' if not os.path.isfile(elf_loc): raise FileNotFoundError try: objdump_out = subprocess.check_output(['objdump', '-h', elf_loc]) \ .decode('utf-8') except subprocess.CalledProcessError as exn: raise Exception(("Cannot run objdump on {}: objdump " "terminated with exit code {}.").format( elf_loc, exn.returncode)) return objdump_out.split('\n') def get_elf_sections(elf_loc): ''' List the ELF sections of the given ELF ''' sections = {} for line in invoke_objdump_headers(elf_loc): line = line.strip() if not line or not '0' <= line[0] <= '9': # not a section line continue spl = line.split() sections[spl[1]] = { 'name': spl[1], 'size': int(spl[2], 0x10), } return sections def matching_eh_elf(eh_locs, elf_name): ''' Get the .eh_elf.so file matching elf_name in the list of directories eh_locs. Raises FileNotFoundError if there is no such file ''' basename = os.path.basename(elf_name) + '.eh_elf.so' for prefix in eh_locs: eh_elf_path = os.path.join(prefix, basename) if os.path.isfile(eh_elf_path): return eh_elf_path raise FileNotFoundError("No such file {}".format(basename)) def objects_list(args): ''' Get the list of elf objects to process ''' out = [] eh_elfs_dirs = ( args.eh_elfs + ([] if args.no_dft_aux else DEFAULT_AUX_DIRS) ) if args.deps: objects = set(args.object) for obj in args.object: objects = objects.union(elf_so_deps(obj)) objects = list(objects) objects.sort() else: objects = args.object objects = list(map(readlink_rec, objects)) for obj in objects: out.append(ElfObject(obj, matching_eh_elf(eh_elfs_dirs, obj))) return out def process_args(): ''' Process `sys.argv` arguments ''' parser = argparse.ArgumentParser( description=("Compare the sizes of the .eh_frame section in the " "original binary and of the .text in the generated " ".eh_elf.so."), ) parser.add_argument('--deps', action='store_true', help=("Also compare the shared objects this object " "depends on")) parser.add_argument('--eh-elfs', required=True, action='append', help=("Indicate the directory in which eh_elfs are " "located")) parser.add_argument('-A', '--no-dft-aux', action='store_true', help=("Do not use the default eh_elf locations")) parser.add_argument('object', nargs='+', help="The ELF object(s) to process") return parser.parse_args() def get_or_default(obj, field, default=None): ''' Access a field of a subscriptable, returning a default if there is no such field ''' if field not in obj: return default return obj[field] def main(): args = process_args() objs = objects_list(args) col_names = [ 'Shared object', 'Orig prog size', 'Orig eh_frame', 'Gen eh_elf .text', '+ .rodata', '% of prog size', 'Growth', ] col_len = [] displayed_name_filter = lambda x: os.path.basename(x.elf) max_elf_name = max(map(lambda x: len(displayed_name_filter(x)), objs)) col_len.append(max(max_elf_name, len(col_names[0]))) for i in range(1, len(col_names)): col_len.append(len(col_names[i]) + 1) col_len = list(map(str, col_len)) header_format = ('{:<' + col_len[0] + '} ' '{:<' + col_len[1] + '} ' '{:<' + col_len[2] + '} ' '{:<' + col_len[3] + '} ' '{:<' + col_len[4] + '} ' '{:<' + col_len[5] + '} ' '{:<' + col_len[6] + '}') row_format = ('{:>' + col_len[0] + '} ' '{:>' + col_len[1] + '} ' '{:>' + col_len[2] + '} ' '{:>' + col_len[3] + '} ' '{:>' + col_len[4] + '} ' '{:>' + col_len[5] + '} ' '{:>' + col_len[6] + '}') print(header_format.format( col_names[0], col_names[1], col_names[2], col_names[3], col_names[4], col_names[5], col_names[6], )) total_program_size = 0 total_eh_frame_size = 0 total_eh_elf_text_size = 0 total_eh_elf_size = 0 for obj in objs: elf_sections = get_elf_sections(obj.elf) eh_elf_sections = get_elf_sections(obj.eh_elf) text_size = get_or_default( elf_sections, '.text', {'size': 0})['size'] rodata_size = get_or_default( elf_sections, '.rodata', {'size': 0})['size'] eh_frame_size = get_or_default( elf_sections, '.eh_frame', {'size': 0})['size'] eh_elf_text_size = get_or_default( eh_elf_sections, '.text', {'size': 0})['size'] eh_elf_size = eh_elf_text_size + \ get_or_default( eh_elf_sections, '.rodata', {'size': 0})['size'] program_size = text_size + rodata_size total_program_size += program_size total_eh_frame_size += eh_frame_size total_eh_elf_text_size += eh_elf_text_size total_eh_elf_size += eh_elf_size print(row_format.format( displayed_name_filter(obj), format_size(program_size), format_size(eh_frame_size), format_size(eh_elf_text_size), format_size(eh_elf_size), '{:.2f}'.format(eh_elf_size / program_size * 100), '{:.2f}'.format(eh_elf_size / eh_frame_size))) # Checking for missed big sections for section in eh_elf_sections: if section == '.text' or section == '.rodata': continue if eh_elf_sections[section]['size'] > eh_elf_size / 2: print("\t\t/!\\ Section {} is big ({}) in the eh_elf".format( section, format_size(eh_elf_sections[section]['size']))) print(row_format.format( 'Total', format_size(total_program_size), format_size(total_eh_frame_size), format_size(total_eh_elf_size), format_size(total_eh_elf_text_size), '{:.2f}'.format(total_eh_elf_size / total_program_size * 100), '{:.2f}'.format(total_eh_elf_size / total_eh_frame_size))) if __name__ == '__main__': main()