From 2e656c44d9c9e3cf57e8bafb83f117beb5e1f909 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Bastian?= Date: Fri, 3 Aug 2018 01:04:38 +0200 Subject: [PATCH] Add first results --- report/report.tex | 91 ++++++++++++++++++++++++++- report/tables/compare_sizes_old | 8 +++ report/tables/compare_sizes_old.cmpsz | 6 ++ report/tables/to_latex.py | 30 +++++++++ 4 files changed, 132 insertions(+), 3 deletions(-) create mode 100644 report/tables/compare_sizes_old create mode 100644 report/tables/compare_sizes_old.cmpsz create mode 100644 report/tables/to_latex.py diff --git a/report/report.tex b/report/report.tex index e46e1d2..1f84c8c 100644 --- a/report/report.tex +++ b/report/report.tex @@ -15,6 +15,8 @@ Under supervision of Francesco Zappa-Nardelli\\ \usepackage{mathtools} \usepackage{indentfirst} \usepackage[utf8]{inputenc} +\usepackage{makecell} +\usepackage{booktabs} %\usepackage[backend=biber,style=alphabetic]{biblatex} \usepackage[backend=biber]{biblatex} @@ -26,6 +28,11 @@ Under supervision of Francesco Zappa-Nardelli\\ \addbibresource{../shared/report.bib} +\renewcommand\theadalign{c} +\renewcommand\theadfont{\bfseries} +%\renewcommand\theadgape{\Gape[4pt]} +%\renewcommand\cellgape{\Gape[4pt]} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{document} @@ -227,7 +234,7 @@ the relevant FDE from its start, until it finds the row it was seeking. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{DWARF semantics} +\section{DWARF semantics}\label{sec:semantics} We will now define semantics covering most of the operations used for CFI\footnote{To be defined elsewhere in the report} described in the DWARF @@ -507,13 +514,91 @@ and $\semR{\bullet}$ is defined as %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Stack unwinding data compilation} +The tentative approach that was chosen to try to get better unwinding speeds at +a reasonable space loss was to compile directly the \ehframe{} into native +machine code on the x86\_64 platform. + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Compilation: \ehelfs} -\todo{} + +The rough idea of the compilation is to produce, out of the \ehframe{} section +of a binary, C code that resembles the code shown in the DWARF semantics from +Section~\ref{sec:semantics} above. This C code is then compiled by GCC, +providing for free all the optimisation passes of a modern compiler. This code +is compiled as a shared library, containing a single function, taking as +argument an instruction pointer and a memory context (\ie{} the value of the +various machine registers). An optionally enabled parameter can be used to pass +a function pointer to a dereferencing function, that conceptually does what the +dereferencing \lstc{*} operator on a pointer, and is used to unwind a process +that is not the currently running process, and thus not sharing the same +address space. A call to this function returns a fresh memory context, +containing the values the registers hold after unwinding this frame. + +This generated data is stored in separate shared object files, which we call +\ehelfs. It would have been possible to alter the original ELF file to embed +this data as a new section, but it getting it to be executed just as any +portion of the \lstc{.text} section would probably have been painful, and +keeping it separated during the experimental phase is quite convenient. It is +possible to have multiple versions of \ehelfs{} files in parallel, with various +options turned on or off, and it doesn't require to alter the base system by +editing \eg{} \texttt{/usr/lib/libc-*.so}. Instead, when the \ehelf{} data is +required, those files can simply be \lstc{dlopen}'d. + +\todo{More details here? Is it necessary or just too technical?} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{First results} -\todo{} + +Without any particular care to efficiency or compactness, it is already +possible to produce a compiled version very close to the one described in +Section~\ref{sec:semantics}. Although the unwinding speed cannot yet be +actually benchmarked, it is already possible to write in a few hundreds of line +of C a simple stack walker printing the functions traversed. It already works +without any problem on the easily tested cases, since corner cases are mostly +found in standard and highly optimal libraries, and it is not that easy to get +the program to stop and print a stack trace from within a system library +without using a debugger. + +The major drawback of this approach, without any particular care taken, is the +space waste. + +\begin{table}[h] + \begin{tabular}{r r r r r r} + \toprule + \thead{Shared object} & \thead{Original \\ program size} + & \thead{Original \\ \lstc{.eh\_frame}} + & \thead{Generated \\ \ehelf{} \lstc{.text}} + & \thead{\% of original \\ program size} + & \thead{Growth \\ factor} \\ + \midrule + libc-2.27.so & 1.4 MiB & 130.1 KiB & 914.9 KiB & 63.92 & 7.03 \\ + libpthread-2.27.so & 58.1 KiB & 11.6 KiB & 70.5 KiB & 121.48 & 6.09 \\ + ld-2.27.so & 129.6 KiB & 9.6 KiB & 71.7 KiB & 55.34 & 7.44 \\ + hackbench & 2.9 KiB & 568.0 B & 2.1 KiB & 74.78 & 3.97 \\ + Total & 1.6 MiB & 151.8 KiB & 1.0 MiB & 65.32 & 6.98 \\ + \bottomrule + \end{tabular} + + \caption{Basic \ehelfs{} space usage}\label{table:basic_eh_elf_space} +\end{table} + +The space taken by those tentative \ehelfs{} is analyzed in +Table~\ref{table:basic_eh_elf_space} for \prog{hackbench}, a small program +introduced later in Section~\qtodo{Add a reference}, and the libraries on which +it depends. + +The first column only includes the sizes of the ELF sections \lstc{.text} (the +program itself) and \lstc{.rodata}, the read-only data (such as static strings, +etc.). Only the weight of the \lstc{.text} section of the generated \ehelfs{} +is considered, because it is self-consistent (few data or none is stored in +\lstc{.rodata}), and the other sections could be removed if the \ehelfs{} +\lstc{.text} was somehow embedded in the original shared object. + +This first tentative version of \ehelfs{} is roughly 7 times heavier than the +original \lstc{.eh_frame}, and represents a far too significant proportion of +the original program size. + +\todo{more in-depth analysis?} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Space optimization} diff --git a/report/tables/compare_sizes_old b/report/tables/compare_sizes_old new file mode 100644 index 0000000..6fa2aba --- /dev/null +++ b/report/tables/compare_sizes_old @@ -0,0 +1,8 @@ +\begin{tabular}{r r r r r r r } + \leftcell{Shared object} & \leftcell{Orig prog size} & \leftcell{Orig eh_frame} & \leftcell{Gen eh_elf .text} & \leftcell{+ .rodata} & \leftcell{% of prog size} & \leftcell{Growth} \\ + libc-2.27.so & 1.4 MiB & 130.1 KiB & 914.9 KiB & 914.9 KiB & 63.92 & 7.03 \\ + libpthread-2.27.so & 58.1 KiB & 11.6 KiB & 70.5 KiB & 70.5 KiB & 121.48 & 6.09 \\ + ld-2.27.so & 129.6 KiB & 9.6 KiB & 71.7 KiB & 71.7 KiB & 55.34 & 7.44 \\ + hackbench & 2.9 KiB & 568.0 B & 2.1 KiB & 2.2 KiB & 74.78 & 3.97 \\ + Total & 1.6 MiB & 151.8 KiB & 1.0 MiB & 1.0 MiB & 65.32 & 6.98 \\ +\end{tabular} diff --git a/report/tables/compare_sizes_old.cmpsz b/report/tables/compare_sizes_old.cmpsz new file mode 100644 index 0000000..09edb66 --- /dev/null +++ b/report/tables/compare_sizes_old.cmpsz @@ -0,0 +1,6 @@ +Shared object Orig prog size Orig eh_frame Gen eh_elf .text + .rodata % of prog size Growth + libc-2.27.so 1.4 MiB 130.1 KiB 914.9 KiB 914.9 KiB 63.92 7.03 +libpthread-2.27.so 58.1 KiB 11.6 KiB 70.5 KiB 70.5 KiB 121.48 6.09 + ld-2.27.so 129.6 KiB 9.6 KiB 71.7 KiB 71.7 KiB 55.34 7.44 + hackbench 2.9 KiB 568.0 B 2.1 KiB 2.2 KiB 74.78 3.97 + Total 1.6 MiB 151.8 KiB 1.0 MiB 1.0 MiB 65.32 6.98 diff --git a/report/tables/to_latex.py b/report/tables/to_latex.py new file mode 100644 index 0000000..71c31c2 --- /dev/null +++ b/report/tables/to_latex.py @@ -0,0 +1,30 @@ +import sys + + +def split_line(line): + spl = line.strip().split(' ') + cleaned = map(lambda x: x.strip(), (filter(lambda x: x.strip(), spl))) + return list(cleaned) + + +def latexify_row(row): + return ((' & '.join(row)) + r' \\') + + +lines = sys.stdin.readlines() +table = list(map(split_line, lines)) + +cols = len(table[0]) + +### Formatting + +print('\\begin{tabular}{' + ('r ' * cols) + '}') + +## Head row +print('\t' + + latexify_row(list(map(lambda x: r'\leftcell{' + x + r'}', table[0])))) + +for row in table[1:]: + print('\t' + latexify_row(row)) + +print(r'\end{tabular}')