diff --git a/report/data/fde_line_count.csv b/report/data/fde_line_count.csv new file mode 100644 index 0000000..1076134 --- /dev/null +++ b/report/data/fde_line_count.csv @@ -0,0 +1,51 @@ +lines,proportion +1,0.23499301373163092 +2,0.017110093953264273 +3,0.07736352686099734 +4,0.056643700313177545 +5,0.03385015658877379 +6,0.03386942905324018 +7,0.04987713803902674 +8,0.0671876656227415 +9,0.008098289568778607 +10,0.033187183811129846 +11,0.04311443025776921 +12,0.07292700554083353 +13,0.005184292941459889 +14,0.0267906528547338 +15,0.017582269332690918 +16,0.11287304264032763 +17,0.01497277764394122 +18,0.0167882437966755 +19,0.00454444712117562 +20,0.013413635268609973 +21,0.00499927728258251 +22,0.001603469043603951 +23,0.007791857383762949 +24,0.01650879306191279 +25,0.002752107925801012 +26,0.00184822934232715 +27,0.001376053962900506 +28,0.0025921464707299446 +29,0.001821247892074199 +30,0.0021218983377499397 +31,0.0018925560105998555 +32,0.003746567092266924 +33,0.0007882437966754999 +34,0.0008653336545410745 +35,0.0006649000240905806 +36,0.00109082148879788 +37,0.0005839556733317273 +38,0.0005068658154661527 +39,0.0006109371235846784 +40,0.0015591423753312456 +41,0.000439412189833775 +42,0.0004914478438930378 +43,0.0003257046494820525 +44,0.0005916646591182848 +45,0.00028330522765598654 +46,0.0004586846543001686 +47,0.00034112262105516744 +48,0.00045290291496025056 +49,0.00024861479161647796 +50,0.00027174174897615035 diff --git a/report/fiche_synthese.tex b/report/fiche_synthese.tex index b946eca..c97e51e 100644 --- a/report/fiche_synthese.tex +++ b/report/fiche_synthese.tex @@ -135,10 +135,9 @@ In most cases of everyday's life, a slow stack unwinding is not a problem, or even an annoyance. Yet, having a 25 times speed-up on stack unwinding-heavy tasks, such as profiling, can be really useful to profile heavy programs, particularly if one wants to profile many times in order to analyze the impact -of multiple changes. It can also be useful for exception-heavy -programs~\qtodo{cite Stephen's software?}. Thus, it might be interesting to -implement a more stable version, and try to interface it cleanly with -mainstream tools, such as \prog{perf}. +of multiple changes. It can also be useful for exception-heavy programs. Thus, +it might be interesting to implement a more stable version, and try to +interface it cleanly with mainstream tools, such as \prog{perf}. Another question worth exploring might be whether it is possible to shrink even more the original DWARF unwinding data, which would be stored in a format not diff --git a/report/report.tex b/report/report.tex index 2f4624a..f996079 100644 --- a/report/report.tex +++ b/report/report.tex @@ -18,6 +18,7 @@ Under supervision of Francesco Zappa-Nardelli\\ \usepackage{makecell} \usepackage{booktabs} \usepackage{wrapfig} +\usepackage{pgfplots} %\usepackage[backend=biber,style=alphabetic]{biblatex} \usepackage[backend=biber]{biblatex} @@ -194,6 +195,12 @@ particularly if a lot of exceptions are thrown and caught far away in their call path. In the former, profiling \emph{is} performance-heavy and often quite slow when analyzing large programs anyway. +One of the causes that inspired this internship were also Stephen Kell's +\prog{libcrunch}~\cite{kell2016libcrunch}, which makes a heavy use of stack +unwinding through \prog{libunwind} and was forced to force \prog{gcc} to use a +frame pointer (\reg{rbp}) everywhere through \lstbash{-fno-omit-frame-pointer} +in order to mitigate the slowness. + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{DWARF format} @@ -291,7 +298,40 @@ unwinding process. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{How big are FDEs?} -\todo{} + +\begin{figure}[h] + \centering + \begin{tikzpicture} + \begin{axis}[ + width=0.9\linewidth, height=4cm, + grid=major, + grid style={dashed,gray!30}, + xlabel=FDE row count, + ylabel=Proportion, + %legend style={at={(0.5,-0.2)},anchor=north}, + xtick distance=5, + ybar, %added here + ] + \addplot[blue,fill] table[x=lines,y=proportion, col sep=comma] + {data/fde_line_count.csv}; + + \end{axis} + \end{tikzpicture} + \caption{FDE line count density}\label{fig:fde_line_density} +\end{figure} + +Since evaluating an \lstc{.eh_frame} FDE entry is, as seen in the previous +section, roughly linear in time in its rows number, we must wonder what is the +distribution of FDE rows count. The histogram in +Figure~\ref{fig:fde_line_density} was generated on a random sample of around +2000 ELF files present on an ArchLinux system. + +Most of the FDEs seem to be quite small, which only reflects that most +functions found in the wild are relatively small and do not particularly +allocate many times on the stack. Yet, the median value is at $8$ rows per FDE, +and the average is at $9.7$, which is already not that fast to unwind. Values +up to $50$ are not that uncommon, given some commonly used functions have such +large FDEs, and often end up in the call stack. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Unwinding state-of-the-art} @@ -312,7 +352,7 @@ the relevant FDE from its start, until it finds the row it was seeking. \section{DWARF semantics}\label{sec:semantics} We will now define semantics covering most of the operations used for -CFI\footnote{To be defined elsewhere in the report} described in the DWARF +CFI\todo{To be defined elsewhere in the report} described in the DWARF standard~\cite{dwarf5std}, with the exception of DWARF expressions. These are not exhaustively treated because they are quite rich and would take a lot of time and space to formalize, and in the meantime are only seldom used (see the @@ -375,7 +415,7 @@ operand~--- are irrelevant and will be eluded. row. This is \emph{not implemented in this semantics} for simplicity and brevity (we would have to introduce CIE (preamble) and FDE (body) independently). This is also not much used in actual ELF - files\footnote{TODO: refer to stats}. + files\todo{refer to stats}. \item{} \dwcfa{remember\_state()}~: push the state of all the registers of this row on an implicit stack \item{} \dwcfa{restore\_state()}~: @@ -715,7 +755,7 @@ the original program size. \todo{more in-depth analysis?} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Space optimization} +\subsection{Space optimization}\label{ssec:space_optim} A lot of small space optimizations, such as filtering out empty FDEs, merging together the rows that are equivalent on all the registers kept, etc.\ were @@ -923,7 +963,46 @@ seconds (using only a single core). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Measured compactness}\label{ssec:results_size} -\todo{} + +A first measure of compactness was made in this report for one of the earliest +working versions in Table~\ref{table:basic_eh_elf_space}. + +The same data, generated for the latest version of \ehelfs, can be seen in +Table~\ref{table:bench_space}. + +The effect of the outlining mentioned in Section~\ref{ssec:space_optim} is +particularly visible in this table: \prog{hackbench} has a significantly bigger +growth than the other shared objects. This is because \prog{hackbench} has a +way smaller \lstc{.eh_frame}, thus, the outlined data is reused only a few +times, compared to \eg{} \prog{libc}, in which the outlined data is reused a +lot. + +\begin{table}[h] + \centering + \begin{tabular}{r r r r r r} + \toprule + \thead{Shared object} & \thead{Original \\ program size} + & \thead{Original \\ \lstc{.eh\_frame}} + & \thead{Generated \\ \ehelf{} \lstc{.text}} + & \thead{\% of original \\ program size} + & \thead{Growth \\ factor} \\ + \midrule + libc-2.27.so + & 1.4 MiB & 130.1 KiB & 313.2 KiB & 21.88 & 2.41 \\ + libpthread-2.27.so + & 58.1 KiB & 11.6 KiB & 25.4 KiB & 43.71 & 2.19 \\ + ld-2.27.so + & 129.6 KiB & 9.6 KiB & 28.6 KiB & 22.09 & 2.97 \\ + hackbench + & 2.9 KiB & 568.0 B & 2.8 KiB & 93.87 & 4.99 \\ + Total + & 1.6 MiB & 151.8 KiB & 370.0 KiB & 22.81 & 2.44 \\ + \bottomrule + \end{tabular} + + \caption{\ehelfs{} space usage}\label{table:bench_space} +\end{table} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Instructions coverage} @@ -937,6 +1016,7 @@ seconds (using only a single core). \printbibliography{} %% License notice %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\vfill \hfill \begin{minipage}{0.7\textwidth} \begin{flushright} \itshape{} \small{} diff --git a/shared/report.bib b/shared/report.bib index 986d4f8..59e0431 100644 --- a/shared/report.bib +++ b/shared/report.bib @@ -76,3 +76,14 @@ bibsource = {dblp computer science bibliography, https://dblp.org} } +@article{kell2016libcrunch, + title={Dynamically diagnosing type errors in unsafe code}, + author={Kell, Stephen}, + journal={ACM SIGPLAN Notices}, + volume={51}, + number={10}, + pages={800--819}, + year={2016}, + publisher={ACM} +} +