Compare commits
1 commit
master
...
review-gui
Author | SHA1 | Date | |
---|---|---|---|
06941f6e33 |
26 changed files with 162 additions and 758 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -137,4 +137,3 @@ sympy-plots-for-*.tex/
|
||||||
# WinEdt
|
# WinEdt
|
||||||
*.bak
|
*.bak
|
||||||
*.sav
|
*.sav
|
||||||
*.xdv
|
|
||||||
|
|
8
Makefile
8
Makefile
|
@ -1,12 +1,6 @@
|
||||||
all: report slides
|
all: report
|
||||||
|
|
||||||
.PHONY: report
|
.PHONY: report
|
||||||
report:
|
report:
|
||||||
$(MAKE) -C report
|
$(MAKE) -C report
|
||||||
ln -sf report/report.pdf .
|
ln -sf report/report.pdf .
|
||||||
|
|
||||||
.PHONY: slides
|
|
||||||
slides:
|
|
||||||
$(MAKE) -C slides
|
|
||||||
ln -sf slides/slides.pdf .
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
all: report.pdf
|
all: report.pdf
|
||||||
|
|
||||||
report.pdf: report.tex fiche_synthese.tex ../shared/report.bib
|
report.pdf: report.tex fiche_synthese.tex ../shared/report.bib
|
||||||
latexmk -xelatex -pdf $<
|
latexmk -pdf $<
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f *aux *bbl *bcf *blg *_latexmk *fls *log *out *.run.xml
|
rm -f *aux *bbl *bcf *blg *_latexmk *fls *log *out *.run.xml
|
||||||
|
|
|
@ -8,13 +8,12 @@
|
||||||
|
|
||||||
\subsection*{The general context}
|
\subsection*{The general context}
|
||||||
|
|
||||||
The standard debugging data format, DWARF (Debugging With Attributed Record
|
The standard debugging data format, DWARF, contains tables that, for a given
|
||||||
Formats), contains tables permitting, for a given instruction pointer (IP), to
|
instruction pointer (IP), permit to understand how the assembly instruction
|
||||||
understand how instructions from the assembly code relates to the original
|
relates to the source code, where variables are currently allocated in memory
|
||||||
source code, where are variables currently allocated in memory or if they are
|
or if they are stored in a register, what are their type and how to unwind the
|
||||||
stored in a register, what are their type and how to unwind the current stack
|
current stack frame. This information is generated when passing \eg{} the
|
||||||
frame. This information is generated when passing \eg{} the switch \lstbash{-g}
|
switch \lstbash{-g} to \prog{gcc} or equivalents.
|
||||||
to \prog{gcc} or equivalents.
|
|
||||||
|
|
||||||
Even in stripped (non-debug) binaries, a small portion of DWARF data remains:
|
Even in stripped (non-debug) binaries, a small portion of DWARF data remains:
|
||||||
the stack unwinding data. This information is necessary to unwind stack
|
the stack unwinding data. This information is necessary to unwind stack
|
||||||
|
@ -29,7 +28,7 @@ Section~\ref{ssec:instr_cov}~\textendash, consisting in offsets from memory
|
||||||
addresses stored in registers (such as \reg{rbp} or \reg{rsp}). Yet, the
|
addresses stored in registers (such as \reg{rbp} or \reg{rsp}). Yet, the
|
||||||
standard defines rules that take the form of a stack-machine expression that
|
standard defines rules that take the form of a stack-machine expression that
|
||||||
can access virtually all the process's memory and perform Turing-complete
|
can access virtually all the process's memory and perform Turing-complete
|
||||||
computations~\cite{oakley2011exploiting}.
|
computation~\cite{oakley2011exploiting}.
|
||||||
|
|
||||||
\subsection*{The research problem}
|
\subsection*{The research problem}
|
||||||
|
|
||||||
|
@ -74,8 +73,8 @@ of compiled DWARF into existing projects have been made easy by implementing an
|
||||||
alternative version of the \textit{de facto} standard library for this purpose,
|
alternative version of the \textit{de facto} standard library for this purpose,
|
||||||
\prog{libunwind}.
|
\prog{libunwind}.
|
||||||
|
|
||||||
We explored and evaluated multiple approaches to determine which compilation
|
Multiple approaches have been tried and evaluated to determine which
|
||||||
process leads to the best time/space trade-off.
|
compilation process leads to the best time/space trade-off.
|
||||||
|
|
||||||
Unexpectedly, the part that proved hardest of the project was finding and
|
Unexpectedly, the part that proved hardest of the project was finding and
|
||||||
implementing a benchmarking protocol that was both relevant and reliable.
|
implementing a benchmarking protocol that was both relevant and reliable.
|
||||||
|
@ -84,8 +83,8 @@ few samples (around $10\,\mu s$ per frame) to avoid statistical errors. Having
|
||||||
enough samples for this purpose --~at least a few thousands~-- is not easy,
|
enough samples for this purpose --~at least a few thousands~-- is not easy,
|
||||||
since one must avoid unwinding the same frame over and over again, which would
|
since one must avoid unwinding the same frame over and over again, which would
|
||||||
only benchmark the caching mechanism. The other problem is to distribute
|
only benchmark the caching mechanism. The other problem is to distribute
|
||||||
evenly the unwinding measures across the various IPs, among which those
|
evenly the unwinding measures across the various IPs, including directly into
|
||||||
directly located into the loaded libraries (\eg{} the \prog{libc}).
|
the loaded libraries (\eg{} the \prog{libc}).
|
||||||
The solution eventually chosen was to modify \prog{perf}, the standard
|
The solution eventually chosen was to modify \prog{perf}, the standard
|
||||||
profiling program for Linux, in order to gather statistics and benchmarks of
|
profiling program for Linux, in order to gather statistics and benchmarks of
|
||||||
its unwindings. Modifying \prog{perf} was an additional challenge that turned
|
its unwindings. Modifying \prog{perf} was an additional challenge that turned
|
||||||
|
@ -129,10 +128,10 @@ the reference implementation. Indeed, corner cases occur often, and on a 27000
|
||||||
samples test, 885 failures were observed for \prog{libunwind}, against 1099 for
|
samples test, 885 failures were observed for \prog{libunwind}, against 1099 for
|
||||||
the compiled DWARF version (see Section~\ref{ssec:timeperf}).
|
the compiled DWARF version (see Section~\ref{ssec:timeperf}).
|
||||||
|
|
||||||
The implementation, however, is not yet production-ready: it only supports the
|
The implementation, however, is not production-ready: it only supports the
|
||||||
x86\_64 architecture, and relies to some extent on the Linux operating system.
|
x86\_64 architecture, and relies to some extent on the Linux operating system.
|
||||||
None of these pose a fundamental problem. Supporting other processor
|
None of those are real problems in practice. Supporting other processor
|
||||||
architectures and ABIs are only a matter of engineering. The operating system
|
architectures and ABIs are only a matter of engineering,. The operating system
|
||||||
dependency is only present in the libraries developed in order to interact with
|
dependency is only present in the libraries developed in order to interact with
|
||||||
the compiled unwinding data, which can be developed for virtually any operating
|
the compiled unwinding data, which can be developed for virtually any operating
|
||||||
system.
|
system.
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
\author{Th\'eophile Bastian\\
|
\author{Th\'eophile Bastian\\
|
||||||
Under supervision of Francesco Zappa Nardelli, March -- August 2018\\
|
Under supervision of Francesco Zappa Nardelli, March -- August 2018\\
|
||||||
{\textsc{parkas}, \textsc{inria}}}
|
{\textsc{parkas}, \'Ecole Normale Sup\'erieure de Paris}}
|
||||||
|
|
||||||
%\date{March -- August 2018\\August 20, 2018}
|
%\date{March -- August 2018\\August 20, 2018}
|
||||||
\date{\vspace{-2em}}
|
\date{\vspace{-2em}}
|
||||||
|
@ -108,7 +108,7 @@ the location of the return address. Then, the compiler might use \reg{rbp}
|
||||||
the function, and allows for easy addressing of local variables. To some
|
the function, and allows for easy addressing of local variables. To some
|
||||||
extents, it also allows for hot debugging, such as saving a useful core dump
|
extents, it also allows for hot debugging, such as saving a useful core dump
|
||||||
upon segfault. Yet, using \reg{rbp} to save \reg{rip} wastes a register, and
|
upon segfault. Yet, using \reg{rbp} to save \reg{rip} wastes a register, and
|
||||||
the decision of using it, on x86\_64 System V, is up to the compiler.
|
the decision of using it is, on x86\_64 System V, up to the compiler.
|
||||||
|
|
||||||
Usually, a function starts by subtracting some value to \reg{rsp}, allocating
|
Usually, a function starts by subtracting some value to \reg{rsp}, allocating
|
||||||
some space in the stack frame for its local variables. Then, it saves on the
|
some space in the stack frame for its local variables. Then, it saves on the
|
||||||
|
@ -150,7 +150,7 @@ compiler is free to do as it wishes. Even worse, it is not trivial to know
|
||||||
callee-saved registers were at all, since if the function does not alter a
|
callee-saved registers were at all, since if the function does not alter a
|
||||||
register, it does not have to save it.
|
register, it does not have to save it.
|
||||||
|
|
||||||
With this example, it seems pretty clear that some additional data is necessary
|
With this example, it seems pretty clear tha some additional data is necessary
|
||||||
to perform stack unwinding reliably, without only performing a guesswork. This
|
to perform stack unwinding reliably, without only performing a guesswork. This
|
||||||
data is stored along with the debugging information of a program, and one
|
data is stored along with the debugging information of a program, and one
|
||||||
common format of debugging data is DWARF\@.
|
common format of debugging data is DWARF\@.
|
||||||
|
@ -218,23 +218,22 @@ that is, $300\,\text{ms}$ per second of program run with default settings.
|
||||||
|
|
||||||
One of the causes that inspired this internship were also Stephen Kell's
|
One of the causes that inspired this internship were also Stephen Kell's
|
||||||
\prog{libcrunch}~\cite{kell2016libcrunch}, which makes a heavy use of stack
|
\prog{libcrunch}~\cite{kell2016libcrunch}, which makes a heavy use of stack
|
||||||
unwinding through \prog{libunwind} and had to force \prog{gcc} to use a frame
|
unwinding through \prog{libunwind} and was forced to force \prog{gcc} to use a
|
||||||
pointer (\reg{rbp}) everywhere through \lstbash{-fno-omit-frame-pointer} in
|
frame pointer (\reg{rbp}) everywhere through \lstbash{-fno-omit-frame-pointer}
|
||||||
order to mitigate the slowness.
|
in order to mitigate the slowness.
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\subsection{DWARF format}
|
\subsection{DWARF format}
|
||||||
|
|
||||||
The DWARF format was first standardized as the format for debugging information
|
The DWARF format was first standardized as the format for debugging information
|
||||||
of the ELF executable binaries (Extensible Linking Format), which are standard
|
of the ELF executable binaries, which are standard on UNIX-like systems,
|
||||||
on UNIX-like systems, including Linux and MacOS --~but not Windows. It is now
|
including Linux and MacOS --~but not Windows. It is now commonly used across a
|
||||||
commonly used across a wide variety of binary formats to store debugging
|
wide variety of binary formats to store debugging information. As of now, the
|
||||||
information. As of now, the latest DWARF standard is DWARF 5~\cite{dwarf5std},
|
latest DWARF standard is DWARF 5~\cite{dwarf5std}, which is openly accessible.
|
||||||
which is openly accessible.
|
|
||||||
|
|
||||||
The DWARF data commonly includes type information about the variables in the
|
The DWARF data commonly includes type information about the variables in the
|
||||||
original programming language, correspondence of assembly instructions with a
|
original programming language, correspondence of assembly instructions with a
|
||||||
line in the original source file, \ldots{}
|
line in the original source file, \ldots
|
||||||
The format also specifies a way to represent unwinding data, as described in
|
The format also specifies a way to represent unwinding data, as described in
|
||||||
Section~\ref{ssec:stack_unwinding} above, in an ELF section originally called
|
Section~\ref{ssec:stack_unwinding} above, in an ELF section originally called
|
||||||
\lstc{.debug_frame}, but most often found as \ehframe.
|
\lstc{.debug_frame}, but most often found as \ehframe.
|
||||||
|
@ -398,21 +397,27 @@ parse the relevant FDE from its start, until it finds the row it was seeking.
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\section{DWARF semantics}\label{sec:semantics}
|
\section{DWARF semantics}\label{sec:semantics}
|
||||||
|
|
||||||
The DWARF 5 standard~\cite{dwarf5std} is written in English prose, and our
|
We now define semantics covering the operations used for FDEs described in the
|
||||||
first task is to formalize it. Thus, in this section, we first recall the
|
DWARF standard~\cite{dwarf5std}, such as seen in Listing~\ref{lst:ex1_dwraw},
|
||||||
informal behaviour of DWARF instructions as provided by the standard; and then
|
with the exception of DWARF expressions. These are not treated here, because
|
||||||
we formalize their semantics by mapping them to well-defined C code. We omit
|
they form a rich language and would take a lot of time and space to formalize,
|
||||||
the translation of DWARF expressions, because they form a rich language and
|
while in the mean time being seldom used --~see Section~\ref{ssec:instr_cov}.
|
||||||
would take a lot of time and space to formalize, while in the mean time being
|
|
||||||
seldom used --~see Section~\ref{ssec:instr_cov}.
|
|
||||||
|
|
||||||
These semantics are defined \wrt{} the well-formalized C language, and
|
These semantics are defined \wrt{} the well-formalized C language, and
|
||||||
are passing through an intermediate language. The DWARF language can read the
|
are passing through an intermediary language. The DWARF language can read the
|
||||||
whole memory, as well as registers, and is always executed for some instruction
|
whole memory, as well as registers, and is always executed for some instruction
|
||||||
pointer. The C function representing it thus takes as parameters an array
|
pointer. The C function representing it thus takes as parameters an array
|
||||||
of the registers' values as well as an IP, and returns another array of
|
of the registers' values as well as an IP, and returns another array of
|
||||||
registers values, which represents the evaluated DWARF row.
|
registers values, which represents the evaluated DWARF row.
|
||||||
|
|
||||||
|
\subsection{Concerning correctness}\label{ssec:sem_correctness}
|
||||||
|
|
||||||
|
The semantics described in this section are designed in a concern of
|
||||||
|
\emph{formalization} of the original standard. This standard, sadly, only
|
||||||
|
describes in plain English each instruction's action and result. This basis
|
||||||
|
cannot be used to \emph{prove} anything correct without relying on informal
|
||||||
|
interpretations.
|
||||||
|
|
||||||
\subsection{Original language: DWARF instructions}
|
\subsection{Original language: DWARF instructions}
|
||||||
|
|
||||||
These are the DWARF instructions used for CFI description, that is, the
|
These are the DWARF instructions used for CFI description, that is, the
|
||||||
|
@ -481,7 +486,7 @@ a language.
|
||||||
|
|
||||||
\subsection{Intermediary language $\intermedlang$}
|
\subsection{Intermediary language $\intermedlang$}
|
||||||
|
|
||||||
A first pass translates DWARF instructions into this intermediate language
|
A first pass translates DWARF instructions into this intermediary language
|
||||||
$\intermedlang$. It is designed to be more mathematical, representing the same
|
$\intermedlang$. It is designed to be more mathematical, representing the same
|
||||||
thing, but abstracting all the data compression of the DWARF format away, so
|
thing, but abstracting all the data compression of the DWARF format away, so
|
||||||
that we can better reason on it and transform it into C code.
|
that we can better reason on it and transform it into C code.
|
||||||
|
@ -498,7 +503,7 @@ Its grammar is as follows:
|
||||||
\values &::= \bot & \text{Values: undefined,}\\
|
\values &::= \bot & \text{Values: undefined,}\\
|
||||||
&\quad\vert~\valaddr{\spexpr} & \text{at address $x$},\\
|
&\quad\vert~\valaddr{\spexpr} & \text{at address $x$},\\
|
||||||
&\quad\vert~\valval{\spexpr} & \text{of value $x$} \\
|
&\quad\vert~\valval{\spexpr} & \text{of value $x$} \\
|
||||||
&\quad\vert~\valexpr{} & \text{of expression $x$, see in text} \\
|
&\quad\vert~\valexpr{??} & \text{of expression $x$, see in text} \\
|
||||||
\spexpr &::= \regs \times \mathbb{Z}
|
\spexpr &::= \regs \times \mathbb{Z}
|
||||||
& \text{A ``simple'' expression $\reg{reg} + \textit{offset}$} \\
|
& \text{A ``simple'' expression $\reg{reg} + \textit{offset}$} \\
|
||||||
\end{align*}
|
\end{align*}
|
||||||
|
@ -609,7 +614,7 @@ $f$. If we consider the fictive following fictive row $R_0$,
|
||||||
\end{array}\right.
|
\end{array}\right.
|
||||||
\]
|
\]
|
||||||
|
|
||||||
\noindent{}then, we would have
|
then, we would have
|
||||||
|
|
||||||
\[
|
\[
|
||||||
R \insarrow{\reg{rbx}} \left(\valaddr{\reg{rip - 24}}\right)
|
R \insarrow{\reg{rbx}} \left(\valaddr{\reg{rip - 24}}\right)
|
||||||
|
@ -696,7 +701,7 @@ if(ip >= $loc$) {
|
||||||
} \end{lstlisting}
|
} \end{lstlisting}
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
\noindent{}\noindent{}while $\semR{\bullet}$ is defined as
|
while $\semR{\bullet}$ is defined as
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
\semR{\bot} &\eqspace{}
|
\semR{\bot} &\eqspace{}
|
||||||
\text{\lstc{ERROR_VALUE}} \\
|
\text{\lstc{ERROR_VALUE}} \\
|
||||||
|
@ -706,16 +711,6 @@ if(ip >= $loc$) {
|
||||||
\text{\lstc{(old_ctx[reg] + offset)}} \\
|
\text{\lstc{(old_ctx[reg] + offset)}} \\
|
||||||
\end{align*}
|
\end{align*}
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\subsection{Concerning correctness}\label{ssec:sem_correctness}
|
|
||||||
|
|
||||||
The semantics described in this section are designed in a concern of
|
|
||||||
\emph{formalization} of the original standard. This standard, sadly, only
|
|
||||||
describes in plain English each instruction's action and result. This basis
|
|
||||||
cannot be used to \emph{prove} anything correct without relying on informal
|
|
||||||
interpretations.
|
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\section{Stack unwinding data compilation}
|
\section{Stack unwinding data compilation}
|
||||||
|
@ -726,12 +721,12 @@ actual C implementation.
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\subsection{Code availability}\label{ssec:code_avail}
|
\subsection{Code availability}\label{ssec:code_avail}
|
||||||
|
|
||||||
All the code produced during the course of this internship is available on the
|
All the code produced during this internship is available on the various
|
||||||
various repositories from \url{https://git.tobast.fr/m2-internship/}. The
|
repositories from \url{https://git.tobast.fr/m2-internship/}. The repositories
|
||||||
repositories contain \texttt{README} files describing them; a summary and
|
contain \texttt{README} files describing them; a summary and global description
|
||||||
global description can be found in the \texttt{abstract} repository. This
|
can be found in the \texttt{abstract} repository. This should be detailed
|
||||||
should be detailed enough to run the project. The source code is entirely under
|
enough to run the project. The source code is entirely under free software
|
||||||
free software licenses.
|
licenses.
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\subsection{Compilation: \ehelfs}\label{ssec:ehelfs}
|
\subsection{Compilation: \ehelfs}\label{ssec:ehelfs}
|
||||||
|
@ -777,12 +772,8 @@ would do after a \lstbash{frame n} command. Yet, if one was to enhance the
|
||||||
code to handle every register, it would not be much harder and would probably
|
code to handle every register, it would not be much harder and would probably
|
||||||
be only a few hours worth of code refactoring and rewriting.
|
be only a few hours worth of code refactoring and rewriting.
|
||||||
|
|
||||||
\begin{figure}[h]
|
\lstinputlisting[language=C, caption={Unwinding context}, label={lst:unw_ctx}]
|
||||||
\centering{}
|
{src/dwarf_assembly_context/unwind_context.c}
|
||||||
\lstinputlisting[language=C, caption={Unwinding context},
|
|
||||||
label={lst:unw_ctx}]
|
|
||||||
{src/dwarf_assembly_context/unwind_context.c}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
In the unwind context from Listing~\ref{lst:unw_ctx}, the values of type
|
In the unwind context from Listing~\ref{lst:unw_ctx}, the values of type
|
||||||
\lstc{uintptr_t} are the values of the corresponding registers, and
|
\lstc{uintptr_t} are the values of the corresponding registers, and
|
||||||
|
@ -813,11 +804,10 @@ scattered among various \ehelf{} files, one for each shared object loaded
|
||||||
unwinder must first acquire a \emph{memory map}, a table listing the various
|
unwinder must first acquire a \emph{memory map}, a table listing the various
|
||||||
ELF files loaded and \emph{mapped} in memory, and on which memory segment. This
|
ELF files loaded and \emph{mapped} in memory, and on which memory segment. This
|
||||||
memory map is provided by the operating system --~for instance, on Linux, it is
|
memory map is provided by the operating system --~for instance, on Linux, it is
|
||||||
available as a file in \texttt{/proc}, a special part of the file system that
|
available as a file in \texttt{/proc}. Once this map is acquired, when
|
||||||
the kernel uses to communicate with the userland processes. Once this map is
|
unwinding from a given IP, the unwinder must identify the memory segment from
|
||||||
acquired, when unwinding from a given IP, the unwinder must identify the memory
|
which it comes, deduce the source ELF file, and deduce the corresponding
|
||||||
segment from which it comes, deduce the source ELF file, and deduce the
|
\ehelf.
|
||||||
corresponding \ehelf.
|
|
||||||
|
|
||||||
\medskip
|
\medskip
|
||||||
|
|
||||||
|
@ -840,7 +830,7 @@ well on the standard cases that are easily tested, and can be used to unwind
|
||||||
the stack of simple programs.
|
the stack of simple programs.
|
||||||
|
|
||||||
The major drawback of this approach, without any particular care taken, is the
|
The major drawback of this approach, without any particular care taken, is the
|
||||||
waste of space. The space taken by those tentative \ehelfs{} is analyzed in
|
space waste. The space taken by those tentative \ehelfs{} is analyzed in
|
||||||
Table~\ref{table:basic_eh_elf_space} for \prog{hackbench}, a small program
|
Table~\ref{table:basic_eh_elf_space} for \prog{hackbench}, a small program
|
||||||
introduced later in Section~\ref{ssec:bench_perf}, and the libraries on which
|
introduced later in Section~\ref{ssec:bench_perf}, and the libraries on which
|
||||||
it depends.
|
it depends.
|
||||||
|
@ -883,21 +873,21 @@ the original program size ($65\,\%$).
|
||||||
|
|
||||||
A lot of small space optimizations, such as filtering out empty FDEs, merging
|
A lot of small space optimizations, such as filtering out empty FDEs, merging
|
||||||
together the rows that are equivalent on all the registers kept, etc.\ were
|
together the rows that are equivalent on all the registers kept, etc.\ were
|
||||||
made in order to shrink the size of the \ehelfs.
|
made in order to shrink the \ehelfs.
|
||||||
|
|
||||||
\medskip
|
\medskip
|
||||||
|
|
||||||
The optimization that most reduced the output size was to use an if/else tree
|
The major optimization that most reduced the output size was to use an if/else
|
||||||
implementing a binary search on the instruction pointer relevant intervals,
|
tree implementing a binary search on the instruction pointer relevant
|
||||||
instead of a single monolithic switch. In the process, we also \emph{outline}
|
intervals, instead of a single monolithic switch. In the process, we also
|
||||||
code whenever possible, that is, find out identical ``switch cases'' bodies
|
\emph{outline} code whenever possible, that is, find out identical ``switch
|
||||||
--~which are not switch cases anymore, but \texttt{if} bodies~--, move them
|
cases'' bodies --~which are not switch cases anymore, but \texttt{if}
|
||||||
outside of the if/else tree, identify them by a label, and jump to them using a
|
bodies~--, move them outside of the if/else tree, identify them by a label, and
|
||||||
\lstc{goto}, which de-duplicates a lot of code and contributes greatly to the
|
jump to them using a \lstc{goto}, which de-duplicates a lot of code and
|
||||||
shrinking. In the process, we noticed that the vast majority of FDE rows are
|
contributes greatly to the shrinking. In the process, we noticed that the vast
|
||||||
actually taken among very few ``common'' FDE rows. For instance, in the
|
majority of FDE rows are actually taken among very few ``common'' FDE rows. For
|
||||||
\prog{libc}, out of a total of $20827$ rows, only $302$ ($1.5\,\%$) unique rows
|
instance, in the \prog{libc}, out of a total of $20827$ rows, only $302$
|
||||||
remain after the outlining.
|
($1.5\,\%$) unique rows remain after the outlining.
|
||||||
|
|
||||||
This makes this optimization really efficient, as seen later in
|
This makes this optimization really efficient, as seen later in
|
||||||
Section~\ref{ssec:results_size}, but also makes it an interesting question
|
Section~\ref{ssec:results_size}, but also makes it an interesting question
|
||||||
|
@ -1005,8 +995,7 @@ The program that was chosen for \prog{perf}-benchmarking is
|
||||||
\prog{hackbench}~\cite{hackbenchsrc}. This small program is designed to
|
\prog{hackbench}~\cite{hackbenchsrc}. This small program is designed to
|
||||||
stress-test and benchmark the Linux scheduler by spawning processes or threads
|
stress-test and benchmark the Linux scheduler by spawning processes or threads
|
||||||
that communicate with each other. It has the interest of generating stack
|
that communicate with each other. It has the interest of generating stack
|
||||||
activity, being linked against \prog{libc} and \prog{pthread}, and being very
|
activity, be linked against \prog{libc} and \prog{pthread}, and be very light.
|
||||||
light.
|
|
||||||
|
|
||||||
\medskip
|
\medskip
|
||||||
|
|
||||||
|
@ -1066,8 +1055,7 @@ CSmith code is notoriously hard to understand and edit.
|
||||||
All the measures in this report were made on a computer with an Intel Xeon
|
All the measures in this report were made on a computer with an Intel Xeon
|
||||||
E3-1505M v6 CPU, with a clock frequency of $3.00$\,GHz and 8 cores. The
|
E3-1505M v6 CPU, with a clock frequency of $3.00$\,GHz and 8 cores. The
|
||||||
computer has 32\,GB of RAM, and care was taken never to fill it and start
|
computer has 32\,GB of RAM, and care was taken never to fill it and start
|
||||||
swapping --~using the hard drive to store data instead of the RAM when it is
|
swapping.
|
||||||
full, degrading harshly the performance.
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\subsection{Measured time performance}\label{ssec:timeperf}
|
\subsection{Measured time performance}\label{ssec:timeperf}
|
||||||
|
@ -1132,8 +1120,7 @@ The compilation time of \ehelfs{} is also reasonable. On the machine
|
||||||
described in Section~\ref{ssec:bench_hw}, and without using multiple cores to
|
described in Section~\ref{ssec:bench_hw}, and without using multiple cores to
|
||||||
compile, the various shared objects needed to run \prog{hackbench} --~that is,
|
compile, the various shared objects needed to run \prog{hackbench} --~that is,
|
||||||
\prog{hackbench}, \prog{libc}, \prog{ld} and \prog{libpthread}~-- are compiled
|
\prog{hackbench}, \prog{libc}, \prog{ld} and \prog{libpthread}~-- are compiled
|
||||||
in an overall time of $25.28$ seconds, which a developer is probably prepared
|
in an overall time of $25.28$ seconds.
|
||||||
to wait for.
|
|
||||||
|
|
||||||
The unwinding errors observed are hard to investigate, but are most probably
|
The unwinding errors observed are hard to investigate, but are most probably
|
||||||
due to truncated stack records. Indeed, since \prog{perf} dumps the last $n$
|
due to truncated stack records. Indeed, since \prog{perf} dumps the last $n$
|
||||||
|
@ -1191,7 +1178,7 @@ registers represent most columns --~see Section~\ref{ssec:instr_cov}.
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\subsection{Instructions coverage}\label{ssec:instr_cov}
|
\subsection{Instructions coverage}\label{ssec:instr_cov}
|
||||||
|
|
||||||
In order to determine which DWARF instructions should be implemented to
|
In order to determine which DWARF instructions are necessary to implement to
|
||||||
have meaningful results, as well as to assess the instruction coverage of our
|
have meaningful results, as well as to assess the instruction coverage of our
|
||||||
compiler and \ehelfs, we must look at real-world ELF files and inspect the
|
compiler and \ehelfs, we must look at real-world ELF files and inspect the
|
||||||
instructions used.
|
instructions used.
|
||||||
|
@ -1305,39 +1292,6 @@ It is also worth noting that among all of the 4000 analyzed files, all the
|
||||||
unsupported expressions are clustered in only 12 of them, and only 24 contained
|
unsupported expressions are clustered in only 12 of them, and only 24 contained
|
||||||
unsupported instructions at all.
|
unsupported instructions at all.
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\section*{Conclusion}
|
|
||||||
|
|
||||||
From this data, we can deduce that
|
|
||||||
|
|
||||||
\begin{itemize}[itemsep=3pt, parsep=0pt]
|
|
||||||
|
|
||||||
\item compilation of the DWARF unwinding data is effective to speed up
|
|
||||||
drastically unwinding procedures: speedup of $\times 25.9$;
|
|
||||||
|
|
||||||
\item code outlining is effective to reduce the produced binary size: from
|
|
||||||
$1\ \text{MiB}$ to $370\ \text{KiB}$, from a growth factor of $7$
|
|
||||||
compared to DWARF unwinding data to a growth factor of $2.45$;
|
|
||||||
|
|
||||||
\item unwinding relies on small subset of DWARF instructions and
|
|
||||||
expressions, while most instructions are not used at all in DWARF code
|
|
||||||
produced by compilers.
|
|
||||||
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
The overall size of the project is
|
|
||||||
|
|
||||||
\begin{itemize}[itemsep=3pt, parsep=0pt]
|
|
||||||
\item compiler: 1628 lines,
|
|
||||||
\item \prog{libunwind}: 810 lines,
|
|
||||||
\item \prog{perf}: 222 lines
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\noindent{} for a total of 2660 lines of code on the main project. The various
|
|
||||||
statistics, benchmarking, testing and analyzing code modules add up to around
|
|
||||||
1500 more lines.
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
%%%% End main text content %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%% End main text content %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
86
reviews.guinness
Normal file
86
reviews.guinness
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
# fiche_synthese.tex:
|
||||||
|
|
||||||
|
l11 : Toujours donner le vrai nom quand tu donnes un acronyme pour la première fois.
|
||||||
|
l12 : s/permit/permits non ?
|
||||||
|
l12 : the assembly instruction, explique un peu. Là on sait pas si tu parles de l'assemblée nationale ou bien d'une instruction assembleur :P
|
||||||
|
l13 : where are variables currently located
|
||||||
|
l14 : explique rapidement ce que veut dire unwind ?
|
||||||
|
l31 : s/computation/computations non ?
|
||||||
|
|
||||||
|
l76 : have been tested il me semble.
|
||||||
|
l87-88 : Je comprends pas. C'est pas included plutôt que including ?
|
||||||
|
Dans cette suite de paragraphes : Beaucoup de redondance, regarde les débuts
|
||||||
|
des deux derniers : The implmentation is not yet release-ready et juste après
|
||||||
|
The implementation is not production-ready. Pareil pour les infos des deux §
|
||||||
|
précédents, j'ai l'impression de lire 2 fois de suite la même chose.
|
||||||
|
|
||||||
|
l134 : supprime la , ou le . qui sont adjacents.
|
||||||
|
|
||||||
|
# report.tex
|
||||||
|
|
||||||
|
l77 : s/whose/which/
|
||||||
|
l88 : onto the stack in reversed order iirc.
|
||||||
|
l111 : the decision of using it is up to the compiler on x86\_64 System V
|
||||||
|
l113 : s/subtracting/substracting
|
||||||
|
l153 : s/tha/that
|
||||||
|
l195 : donner un lien vers la stack-unwinding library ?
|
||||||
|
l215 pour le torbrowser, euh tu fais ça comment ? Parce c'est quand même
|
||||||
|
majoritairement un gros bout de python autour d'un firefox un tout petit peu
|
||||||
|
moddé. Il n'y a pas un autre exemple ?
|
||||||
|
l221 : s/was forced to/ had to/ pour éviter la redondance non ?
|
||||||
|
l229 : idem que pour DWARF, donner le vrai nom de l'acronyme.
|
||||||
|
l236-237 : ,\ldots{} et non , \ldots. L'avantage de passer l'argument vide,
|
||||||
|
c'est que ça te mets le bon espacement après :P
|
||||||
|
|
||||||
|
## subsection 1.6 : How big are FDEs?
|
||||||
|
|
||||||
|
C'est très (trop ?) court. Si tu peux mettre l'histogramme avec pour compléter
|
||||||
|
ça serait pas mal, là on se demande à quel point il est utile de faire une
|
||||||
|
subsection pour ça.
|
||||||
|
|
||||||
|
## subsection 1.7 : Unwinding state-of-the-art
|
||||||
|
|
||||||
|
Pareil trop court je pense. Tu peux pas reparler un peu de C++ et de leur
|
||||||
|
version de l'unwinding ? Ou alors, vite fait parler du caching pour qu'on
|
||||||
|
comprenne ce qu'il se passe.
|
||||||
|
|
||||||
|
l555 : conciseness plutôt que brevity non ? brevity est assez peu courant.
|
||||||
|
l736 : justifie un peu pourquoi -O2 et pas -O3 éventuellement. On comprend pas
|
||||||
|
vraiment pourquoi tu ne fais pas du -O3.
|
||||||
|
|
||||||
|
Dans cette subsection aussi, il y a un travail pour ordonner les case du switch
|
||||||
|
selon les fréquences d'apparition des instructions DWARF ou bien c'est juste en
|
||||||
|
ordre croissant ?
|
||||||
|
|
||||||
|
l775 ; le bout de code collé à gauche, c'est un peu tout moche non ?
|
||||||
|
|
||||||
|
l807 : un néophyte ne sait pas à quoi correspond /proc. Tu perds tou·te·s les
|
||||||
|
catégoricien·ne·s là ^^
|
||||||
|
l833 : the waste of space je pense. Là ça fait bizarre à lire.
|
||||||
|
l876 : \ehelfs file ou alors to shrink the size of \eh_elfs
|
||||||
|
l880 : the major optimization that reduced ou bien The optimization that most
|
||||||
|
reduced non ?
|
||||||
|
l922 : Si c'est zasy to prove, pourquoi ne pas le mettre, même en annexe ? À
|
||||||
|
moins que tu laisses un hint pour une question facile ?
|
||||||
|
l986 : natural enough setup, tu veux pas plutôt mettre legitimate ou un truc
|
||||||
|
dans ce genre ? On trouve pas ce genre de setup dans une forêt ou au fond d'un
|
||||||
|
lac ^^
|
||||||
|
l997-998 : concordance : being linked et being very light. Tu te réfère
|
||||||
|
toujours à l'intérêt du début de la phrase.
|
||||||
|
|
||||||
|
l1003 : and to implement.
|
||||||
|
l1011 : vanilla version, c'est standard en anglais t'es sûr ?
|
||||||
|
|
||||||
|
l1058 : start swappping -> explique ce que c'est, encore une fois tu as des
|
||||||
|
béotiens devant toi ^^
|
||||||
|
|
||||||
|
l119-1123 : Ok, joli nombre mais ça compare à quoi ? c'est combien sans, etc ?
|
||||||
|
l1125 : are most probably, je comprends pas. faut réorganiser les mots "are most probably due to"
|
||||||
|
ou alors en supprimer un mais là c'est weird.
|
||||||
|
|
||||||
|
l1181 : should be implemented plutôt que are necessary to implement je pense.
|
||||||
|
|
||||||
|
Ça manque d'une conclusion je pense.
|
||||||
|
|
||||||
|
Remarque générale : listings, j'aime pas, je trouve ça moche, et la coloration syntaxique est un peu nulle.
|
||||||
|
Je préfère largement minted, ça utilise pygmentize en background, et c'est assez magique :3
|
|
@ -1 +0,0 @@
|
||||||
../../report/imgs/call_stack/call_stack.png
|
|
|
@ -24,7 +24,7 @@
|
||||||
|
|
||||||
\newcommand{\valaddr}[1]{\operatorname{Addr}\left(#1\right)}
|
\newcommand{\valaddr}[1]{\operatorname{Addr}\left(#1\right)}
|
||||||
\newcommand{\valval}[1]{\operatorname{Val}\left(#1\right)}
|
\newcommand{\valval}[1]{\operatorname{Val}\left(#1\right)}
|
||||||
\newcommand{\valexpr}{\operatorname{Expr}}
|
\newcommand{\valexpr}[1]{\operatorname{Expr}\left(#1\right)}
|
||||||
|
|
||||||
\newcommand{\intermedlang}{\mathcal{I}}
|
\newcommand{\intermedlang}{\mathcal{I}}
|
||||||
|
|
||||||
|
|
|
@ -8,4 +8,4 @@
|
||||||
\newcommand{\qtodo}[1]{\colorbox{todobg}{\textcolor{todofg}{#1}}}
|
\newcommand{\qtodo}[1]{\colorbox{todobg}{\textcolor{todofg}{#1}}}
|
||||||
\newcommand{\todo}[1]{\qtodo{\textbf{TODO:}\,#1}}
|
\newcommand{\todo}[1]{\qtodo{\textbf{TODO:}\,#1}}
|
||||||
\newcommand{\qnote}[1]{\colorbox{notebg}{\textcolor{notefg}{#1}}}
|
\newcommand{\qnote}[1]{\colorbox{notebg}{\textcolor{notefg}{#1}}}
|
||||||
\newcommand{\tnote}[1]{\qnote{\textbf{NOTE:}\,#1}}
|
\newcommand{\note}[1]{\qnote{\textbf{NOTE:}\,#1}}
|
||||||
|
|
|
@ -1,5 +0,0 @@
|
||||||
all:
|
|
||||||
latexmk -xelatex -pdf slides.tex
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f *aux *bbl *bcf *blg *_latexmk *fls *log *out *.run.xml
|
|
Binary file not shown.
Before Width: | Height: | Size: 602 KiB |
Binary file not shown.
Before Width: | Height: | Size: 9.5 KiB |
Binary file not shown.
Before Width: | Height: | Size: 226 KiB |
Binary file not shown.
Before Width: | Height: | Size: 25 KiB |
Binary file not shown.
|
@ -1,524 +0,0 @@
|
||||||
% vim: spell spelllang=en
|
|
||||||
|
|
||||||
\documentclass[11pt,xcolor={usenames,dvipsnames}]{beamer}
|
|
||||||
\usetheme{Warsaw}
|
|
||||||
\usepackage[utf8]{inputenc}
|
|
||||||
\usepackage[english]{babel}
|
|
||||||
\usepackage[T1]{fontenc}
|
|
||||||
\usepackage{amsmath}
|
|
||||||
\usepackage{amsfonts}
|
|
||||||
\usepackage{amssymb}
|
|
||||||
\usepackage{booktabs}
|
|
||||||
\usepackage{makecell}
|
|
||||||
\usepackage{ifthen}
|
|
||||||
\usepackage{colortbl}
|
|
||||||
|
|
||||||
\usepackage{../shared/my_listings}
|
|
||||||
%\usepackage{../shared/my_hyperref}
|
|
||||||
\usepackage{../shared/specific}
|
|
||||||
\usepackage{../shared/common}
|
|
||||||
\usepackage{../shared/todo}
|
|
||||||
|
|
||||||
\usepackage{inconsolata}
|
|
||||||
\lstset{basicstyle=\footnotesize\ttfamily}
|
|
||||||
|
|
||||||
\renewcommand\theadalign{c}
|
|
||||||
\renewcommand\theadfont{\scriptsize\bfseries}
|
|
||||||
|
|
||||||
\setbeamertemplate{navigation symbols}{}
|
|
||||||
\setbeamertemplate{headline}{}
|
|
||||||
|
|
||||||
\newcommand{\thenalert}[1]{\only<1>{#1}\only<2>{\alert{#1}}}
|
|
||||||
\newcommand{\slidecountline}{
|
|
||||||
\ifthenelse{\theframenumber = 0}
|
|
||||||
{}
|
|
||||||
{\insertframenumber/\inserttotalframenumber}}
|
|
||||||
\newcommand{\sectionline}{
|
|
||||||
\ifthenelse{\thesection = 0}
|
|
||||||
{}
|
|
||||||
{\Roman{section}~-- \insertsection}}
|
|
||||||
|
|
||||||
\AtBeginSection[]{
|
|
||||||
\begin{frame}
|
|
||||||
\vfill
|
|
||||||
\centering
|
|
||||||
\begin{beamercolorbox}[sep=8pt,center,shadow=true,rounded=true]{title}
|
|
||||||
\usebeamerfont{title}\insertsectionhead\par%
|
|
||||||
\end{beamercolorbox}
|
|
||||||
\vfill
|
|
||||||
\end{frame}
|
|
||||||
}
|
|
||||||
|
|
||||||
\lstdefinelanguage{gdb}{
|
|
||||||
morekeywords={gdb},
|
|
||||||
sensitive=false,
|
|
||||||
}
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\author[\slidecountline]{Théophile \textsc{Bastian} \\
|
|
||||||
\small{Under supervision of Francesco Zappa Nardelli}}
|
|
||||||
\title[\sectionline]
|
|
||||||
{Speeding up stack unwinding by compiling DWARF debug data}
|
|
||||||
\date{March\ --\ August 2018}
|
|
||||||
%\subject{}
|
|
||||||
%\logo{}
|
|
||||||
\institute{Team PARKAS, INRIA, Paris}
|
|
||||||
|
|
||||||
\begin{document}
|
|
||||||
|
|
||||||
\begin{frame}
|
|
||||||
\addtocounter{framenumber}{-1}
|
|
||||||
\titlepage{}
|
|
||||||
|
|
||||||
\vspace{-2em}
|
|
||||||
\begin{center}
|
|
||||||
\begin{align*}
|
|
||||||
\text{Slides: } &\text{\url{https://tobast.fr/m2/slides.pdf}} \\
|
|
||||||
\text{Report: } &\text{\url{https://tobast.fr/m2/report.pdf}}
|
|
||||||
\end{align*}
|
|
||||||
\end{center}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
\begin{frame}{~}
|
|
||||||
\addtocounter{framenumber}{-1}
|
|
||||||
\tableofcontents[hideallsubsections]
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\section{Stack unwinding data}
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\subsection{Introduction}
|
|
||||||
|
|
||||||
\begin{frame}[fragile]{We often use stack unwinding!}
|
|
||||||
\begin{columns}[c]
|
|
||||||
\begin{column}{0.70\textwidth}
|
|
||||||
\begin{lstlisting}[language=gdb, numbers=none, escapechar=|]
|
|
||||||
Program received signal SIGSEGV.
|
|
||||||
0x54625 in fct_b at segfault.c:5
|
|
||||||
5 printf("%l\n", *b);
|
|
||||||
|
|
||||||
|\pause| (gdb) backtrace
|
|
||||||
#0 0x54625 in fct_b at segfault.c:5
|
|
||||||
#1 0x54663 in fct_a at segfault.c:10
|
|
||||||
#2 0x54674 in main at segfault.c:14
|
|
||||||
|
|
||||||
|\pause| (gdb) frame 1
|
|
||||||
#1 0x54663 in fct_a at segfault.c:10
|
|
||||||
10 fct_b((int*) a);
|
|
||||||
|
|
||||||
|\pause| (gdb) print a
|
|
||||||
$1 = 84
|
|
||||||
\end{lstlisting}
|
|
||||||
\vspace{-1em}
|
|
||||||
\pause{}
|
|
||||||
\begin{center}
|
|
||||||
\textbf{\Large How does it work?!}
|
|
||||||
\end{center}
|
|
||||||
\end{column}
|
|
||||||
\begin{column}{0.35\textwidth}
|
|
||||||
\pause{}
|
|
||||||
\includegraphics[width=0.95\linewidth]{img/stack/call_stack}
|
|
||||||
\end{column}
|
|
||||||
\end{columns}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\subsection{Stack frames and unwinding}
|
|
||||||
|
|
||||||
\begin{frame}{Call stack and registers}
|
|
||||||
\begin{columns}[c]
|
|
||||||
\begin{column}{0.55\textwidth}
|
|
||||||
\begin{center}
|
|
||||||
\large\bf
|
|
||||||
How do we get the grandparent RA\@?
|
|
||||||
|
|
||||||
\medskip
|
|
||||||
|
|
||||||
Isn't it as trivial as \texttt{pop()}?
|
|
||||||
|
|
||||||
\vspace{2em}
|
|
||||||
|
|
||||||
\only<2>{We only have \reg{rsp} and \reg{rip}.}
|
|
||||||
|
|
||||||
\end{center}
|
|
||||||
\end{column}
|
|
||||||
\begin{column}{0.45\textwidth}
|
|
||||||
\includegraphics[width=0.95\linewidth]{img/stack/call_stack}
|
|
||||||
\end{column}
|
|
||||||
\end{columns}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\subsection{DWARF tables}
|
|
||||||
|
|
||||||
\newcolumntype{a}{>{\columncolor{RedOrange}}l}
|
|
||||||
|
|
||||||
\begin{frame}{DWARF unwinding data}
|
|
||||||
\vspace{2em}
|
|
||||||
\tt \footnotesize
|
|
||||||
\begin{tabular}{
|
|
||||||
>{\columncolor{YellowGreen}}l
|
|
||||||
>{\columncolor{Thistle}}l
|
|
||||||
l l l l l l
|
|
||||||
>{\columncolor{Apricot}}l}
|
|
||||||
~LOC & CFA & rbx & rbp & r12 & r13 & r14 & r15 & ra \\
|
|
||||||
0084950 & rsp+8 & u & u & u & u & u & u & c-8 \\
|
|
||||||
0084952 & rsp+16 & u & u & u & u & u & c-16 & c-8 \\
|
|
||||||
0084954 & rsp+24 & u & u & u & u & c-24 & c-16 & c-8 \\
|
|
||||||
0084956 & rsp+32 & u & u & u & c-32 & c-24 & c-16 & c-8 \\
|
|
||||||
0084958 & rsp+40 & u & u & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
|
||||||
0084959 & rsp+48 & u & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
|
||||||
\rowcolor{Aquamarine} 008495a & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
|
||||||
0084962 & rsp+64 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
|
||||||
0084a19 & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
|
||||||
0084a1d & rsp+48 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
|
||||||
0084a1e & rsp+40 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
|
||||||
0084a20 & rsp+32 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
|
||||||
0084a22 & rsp+24 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
|
||||||
0084a24 & rsp+16 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
|
||||||
0084a26 & rsp+8 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
|
||||||
0084a30 & rsp+64 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
|
||||||
\end{tabular}
|
|
||||||
|
|
||||||
\pause{}
|
|
||||||
|
|
||||||
\vspace{-3cm}
|
|
||||||
\hfill\includegraphics[height=3cm, angle=45, origin=c]{img/dwarf_logo}
|
|
||||||
\hspace{-1cm}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
\begin{frame}[t, fragile]{The real DWARF}
|
|
||||||
\begin{lstlisting}[numbers=none, language=]
|
|
||||||
00009b30 48 009b34 FDE cie=0000 pc=0084950..0084b37
|
|
||||||
DW_CFA_advance_loc: 2 to 0000000000084952
|
|
||||||
DW_CFA_def_cfa_offset: 16
|
|
||||||
DW_CFA_offset: r15 (r15) at cfa-16
|
|
||||||
DW_CFA_advance_loc: 2 to 0000000000084954
|
|
||||||
DW_CFA_def_cfa_offset: 24
|
|
||||||
DW_CFA_offset: r14 (r14) at cfa-24
|
|
||||||
DW_CFA_advance_loc: 2 to 0000000000084956
|
|
||||||
DW_CFA_def_cfa_offset: 32
|
|
||||||
DW_CFA_offset: r13 (r13) at cfa-32
|
|
||||||
DW_CFA_advance_loc: 2 to 0000000000084958
|
|
||||||
DW_CFA_def_cfa_offset: 40
|
|
||||||
DW_CFA_offset: r12 (r12) at cfa-40
|
|
||||||
DW_CFA_advance_loc: 1 to 0000000000084959
|
|
||||||
[...]
|
|
||||||
\end{lstlisting}
|
|
||||||
|
|
||||||
\begin{itemize}
|
|
||||||
\item[\textbf{$\longrightarrow$}] \textbf{\alert{constructed} on-demand
|
|
||||||
by a \alert{Turing-complete bytecode}!}
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\pause{}
|
|
||||||
|
|
||||||
\vspace{-5.5cm}
|
|
||||||
\begin{center}
|
|
||||||
\bf \fontsize{8cm}{1cm}\colorbox{white}{\alert{Slow!}}
|
|
||||||
\end{center}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
\begin{frame}{Why does slow matter?}
|
|
||||||
\begin{itemize}
|
|
||||||
|
|
||||||
\item{} After all, we're talking about \alert{debugging procedures} ran
|
|
||||||
by a \alert{human being} (slower than the machine).
|
|
||||||
|
|
||||||
\ldots{}or are we?
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\pause{}
|
|
||||||
\begin{center}
|
|
||||||
\textbf{\Large{}No!}
|
|
||||||
\end{center}
|
|
||||||
|
|
||||||
\begin{itemize}
|
|
||||||
\pause{}\item{} Pretty much any \alert{program analysis tool}
|
|
||||||
\pause{}\item{} \alert{Profiling} with polling profilers
|
|
||||||
|
|
||||||
\pause{}\item{} \alert{Exception handling} in C++
|
|
||||||
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\vspace{2em}
|
|
||||||
|
|
||||||
\begin{center}
|
|
||||||
\textbf{\Large{}Debug data is not only for debugging}
|
|
||||||
\end{center}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\section{Compiling stack unwinding data ahead-of-time}
|
|
||||||
|
|
||||||
\subsection*{}
|
|
||||||
|
|
||||||
\begin{frame}{Compilation overview}
|
|
||||||
\begin{itemize}
|
|
||||||
\item Compiled to \alert{C code}
|
|
||||||
\item C code then \alert{compiled to native binary} (gcc)
|
|
||||||
\begin{itemize}
|
|
||||||
\item[$\leadsto$] gcc optimisations for free
|
|
||||||
\end{itemize}
|
|
||||||
\item Compiled as \alert{separate \texttt{.so} files}, called \ehelfs{}
|
|
||||||
\bigskip{}
|
|
||||||
\item Morally a \alert{monolithic switch} on IPs
|
|
||||||
\item Each case contains assembly that computes a \alert{row of the
|
|
||||||
table}
|
|
||||||
\end{itemize}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\subsection{Example}
|
|
||||||
|
|
||||||
\begin{frame}{Compilation example: original C, DWARF}
|
|
||||||
\lstinputlisting[language=C]{src/fib7/fib7.cfde}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
\begin{frame}[shrink]{Compilation example: generated C}
|
|
||||||
\lstinputlisting[language=C]{src/fib7/fib7.eh_elf_basic.c}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\subsection{Compilation Strategy}
|
|
||||||
|
|
||||||
\begin{frame}{Compilation choices}
|
|
||||||
\textbf{In order to keep the compiler \alert{simple} and \alert{easily
|
|
||||||
testable}, the whole DWARF5 instruction set is not supported.}
|
|
||||||
|
|
||||||
\begin{itemize}
|
|
||||||
\item Focus on \alert{x86\_64}
|
|
||||||
\item Focus on unwinding return address \\
|
|
||||||
\vspace{0.3ex}
|
|
||||||
$\leadsto$ \textit{Allows building a backtrace}
|
|
||||||
\begin{itemize}
|
|
||||||
\item \alert{suitable for perf, not for gdb}
|
|
||||||
\item Only supports \alert{unwinding registers}: \reg{rip}, \reg{rsp},
|
|
||||||
\reg{rbp}, \reg{rbx}
|
|
||||||
\item Supports the \alert{wide majority} ($> 99.9\%$) of instructions
|
|
||||||
used
|
|
||||||
\item Among \alert{4000} randomly sampled filed, only \alert{24}
|
|
||||||
containing unsupported instructions
|
|
||||||
\end{itemize}
|
|
||||||
\end{itemize}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
\begin{frame}{Interface: libunwind}
|
|
||||||
\begin{itemize}
|
|
||||||
\item \alert{libunwind}: \textit{de facto} standard library for
|
|
||||||
unwinding
|
|
||||||
\item Relies on DWARF
|
|
||||||
|
|
||||||
\bigskip{}
|
|
||||||
|
|
||||||
\item \texttt{libunwind-eh\_elf}: alternative implementation using
|
|
||||||
\ehelfs{}
|
|
||||||
|
|
||||||
\item[$\leadsto$] \alert{alternative implementation} of libunwind,
|
|
||||||
almost plug-and-play for existing projects!
|
|
||||||
\begin{itemize}
|
|
||||||
\item[$\leadsto$] It is \alert{easy} to use \ehelfs{}: just
|
|
||||||
link against the right library!
|
|
||||||
\end{itemize}
|
|
||||||
\end{itemize}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\subsection{Outlining}
|
|
||||||
|
|
||||||
\begin{frame}{Size optimisation: outlining}
|
|
||||||
\begin{itemize}
|
|
||||||
\item This \alert{works}, but \alert{takes space}: about \alert{7 times
|
|
||||||
larger in size} than regular DWARF\@.
|
|
||||||
|
|
||||||
\item DWARF optimisation strategy: \alert{alter previous row}. \\
|
|
||||||
Causes slowness: we cannot do that.
|
|
||||||
|
|
||||||
\item Remark: a lot of lines appear often.
|
|
||||||
\begin{itemize}
|
|
||||||
\item[$\leadsto$] \textbf{\emph{outline} them!}
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\pause{}
|
|
||||||
|
|
||||||
\item On libc, $20\,827$ rows $\rightarrow$ $302$ outlined ($1.5\,\%$)
|
|
||||||
\item Turn the big switch into a binary search \alert{if/else tree}
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\pause{}
|
|
||||||
|
|
||||||
\bigskip{}
|
|
||||||
\begin{center}
|
|
||||||
$\leadsto$ only \textbf{2.5 times bigger than DWARF}
|
|
||||||
\end{center}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
\begin{frame}{Example with outlining}
|
|
||||||
\lstinputlisting[language=C]{src/fib7/fib7.eh_elf_outline.c}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
\subsection{A word on formalization}
|
|
||||||
|
|
||||||
\begin{frame}[t]{A word on formalization}
|
|
||||||
\begin{itemize}
|
|
||||||
\item First task: \alert{writing semantics} for DWARF, written as
|
|
||||||
mapping to C code.
|
|
||||||
\item DWARF5 specification: \alert{plain English}, no proper semantics
|
|
||||||
\item Compiled code is in substance equivalent to semantics
|
|
||||||
\item What remains to prove is mostly \alert{simple or classic
|
|
||||||
optimisations}
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\pause{}
|
|
||||||
\vspace{-3cm}
|
|
||||||
\begin{center}
|
|
||||||
\includegraphics[width=0.8\linewidth, angle=10]{img/dw_spec.png}
|
|
||||||
\end{center}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\section{Benchmarking}
|
|
||||||
|
|
||||||
\begin{frame}{Benchmarking requirements}
|
|
||||||
\begin{enumerate}
|
|
||||||
\item Thousands of samples (single unwind: $10\,\mu{}s$)
|
|
||||||
\item Interesting enough program to unwind: nested functions, complex
|
|
||||||
FDEs
|
|
||||||
\item Mitigate caching: don't always unwind from the \emph{same} point
|
|
||||||
\item Yet be fair: don't always unwind from totally different places
|
|
||||||
\item Distribute evenly: if possible, also from within libraries
|
|
||||||
\end{enumerate}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\begin{frame}{perf instrumentation}
|
|
||||||
\textbf{\alert{perf} is the state-of-the-art polling profiler for Linux.}
|
|
||||||
\begin{itemize}
|
|
||||||
\item{} used to get readings of the time spent in each function
|
|
||||||
\item{} works by regularly stopping the program, unwinding its stack,
|
|
||||||
then aggregating the gathered data
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\pause{}\bigskip{}
|
|
||||||
\textbf{Instrumenting perf matches all the requirements!}
|
|
||||||
|
|
||||||
\begin{itemize}
|
|
||||||
\item{} \alert{Plug \ehelfs{} into perf}: use \ehelfs{} instead of
|
|
||||||
DWARF to unwind the stack
|
|
||||||
\item{} Implement \alert{unwinding performance counters} inside perf
|
|
||||||
\bigskip{}
|
|
||||||
|
|
||||||
\item{} Use perf on \alert{hackbench}, a kernel stress-test program
|
|
||||||
\begin{itemize}
|
|
||||||
\item Small program
|
|
||||||
\item Lots of calls
|
|
||||||
\item Relies on libc, libpthread
|
|
||||||
\end{itemize}
|
|
||||||
\end{itemize}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\section{Results}
|
|
||||||
|
|
||||||
\begin{frame}{Time performance}
|
|
||||||
\small
|
|
||||||
\centering
|
|
||||||
\begin{tabular}{l r r r r r}
|
|
||||||
\toprule
|
|
||||||
\thead{Unwinding method} & \thead{Frames \\ unwound}
|
|
||||||
& \thead{Tot.\ time \\ ($\mu s$)}
|
|
||||||
& \thead{Avg. \\ time / frame \\ ($ns$)}
|
|
||||||
& \thead{Time \\ ratio} \\
|
|
||||||
\midrule
|
|
||||||
\alert{\ehelfs{}}
|
|
||||||
& 23506 % Frames unwound
|
|
||||||
& 14837 % Total time
|
|
||||||
& 631 % Avg time
|
|
||||||
& 1
|
|
||||||
\\
|
|
||||||
\prog{libunwind}, \alert{cached}
|
|
||||||
& 27058 % Frames unwound
|
|
||||||
& 441601 % Total time
|
|
||||||
& 16320 % Avg time
|
|
||||||
& \alert{25.9}
|
|
||||||
\\
|
|
||||||
\prog{libunwind}, \alert{uncached}
|
|
||||||
& 27058 % Frames unwound
|
|
||||||
& 671292 % Total time
|
|
||||||
& 24809 % Avg time
|
|
||||||
& \alert{39.3}
|
|
||||||
\\
|
|
||||||
\bottomrule
|
|
||||||
\end{tabular}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
\begin{frame}{Space performance}
|
|
||||||
\begin{center}
|
|
||||||
\begin{tabular}{r r r r r r}
|
|
||||||
\toprule
|
|
||||||
\thead{Object}
|
|
||||||
& \thead{\% of binary size}
|
|
||||||
& \thead{Growth factor} \\
|
|
||||||
\midrule
|
|
||||||
libc
|
|
||||||
& 21.88 & 2.41 \\
|
|
||||||
libpthread
|
|
||||||
& 43.71 & 2.19 \\
|
|
||||||
ld
|
|
||||||
& 22.09 & 2.97 \\
|
|
||||||
hackbench
|
|
||||||
& 93.87 & 4.99 \\
|
|
||||||
\midrule
|
|
||||||
Total
|
|
||||||
& 22.81 & \alert{2.44} \\
|
|
||||||
\bottomrule
|
|
||||||
\end{tabular}
|
|
||||||
\end{center}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\section*{}
|
|
||||||
\setcounter{section}{0}
|
|
||||||
|
|
||||||
\begin{frame}{What next?}
|
|
||||||
\begin{itemize}
|
|
||||||
\item Implement a release-ready, packageable, easy to use version of
|
|
||||||
perf with \ehelfs{} and submit it for inclusion
|
|
||||||
|
|
||||||
\item{} Measure \alert{C++ exceptions overhead} precisely in common
|
|
||||||
software
|
|
||||||
|
|
||||||
\item{} Implement \alert{\ehelfs{}} support for \alert{C++ runtime}
|
|
||||||
exception handling, and other systems where unwinding is a
|
|
||||||
performance bottleneck
|
|
||||||
|
|
||||||
\medskip
|
|
||||||
|
|
||||||
\item \alert{Outlining} was effective for
|
|
||||||
compactness\ldots{} Try outlining DWARF bytecode\@?
|
|
||||||
|
|
||||||
\end{itemize}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
\begin{frame}
|
|
||||||
\vspace{5mm}
|
|
||||||
\includegraphics[width=\linewidth]{img/keep_breathing}
|
|
||||||
\vspace{-1cm}
|
|
||||||
|
|
||||||
\begin{center}
|
|
||||||
\large
|
|
||||||
\begin{align*}
|
|
||||||
\textbf{Slides: } &\text{\url{https://tobast.fr/m2/slides.pdf}} \\
|
|
||||||
\textbf{Report: } &\text{\url{https://tobast.fr/m2/report.pdf}}
|
|
||||||
\end{align*}
|
|
||||||
\end{center}
|
|
||||||
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
\end{document}
|
|
Binary file not shown.
|
@ -1,17 +0,0 @@
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
void fib7() {
|
|
||||||
int fibo[8];
|
|
||||||
fibo[0] = 1;
|
|
||||||
fibo[1] = 1;
|
|
||||||
for(int pos = 2; pos < 8; ++pos)
|
|
||||||
fibo[pos] =
|
|
||||||
fibo[pos - 1]
|
|
||||||
+ fibo[pos - 2];
|
|
||||||
printf("%d\n", fibo[7]);
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(void) {
|
|
||||||
fib7();
|
|
||||||
return 0;
|
|
||||||
}
|
|
|
@ -1,11 +0,0 @@
|
||||||
DWARF
|
|
||||||
CFA ra
|
|
||||||
void fib7() { 0x615 rsp+8 c-8
|
|
||||||
int fibo[8]; 0x620 rsp+48 c-8
|
|
||||||
fibo[0] = 1;
|
|
||||||
fibo[1] = 1;
|
|
||||||
for(...)
|
|
||||||
...
|
|
||||||
printf("%d\n", fibo[7]);
|
|
||||||
0x659 rsp+8 c-8
|
|
||||||
}
|
|
|
@ -1,15 +0,0 @@
|
||||||
unwind_context_t _eh_elf(
|
|
||||||
unwind_context_t ctx, uintptr_t pc)
|
|
||||||
{
|
|
||||||
unwind_context_t out_ctx;
|
|
||||||
switch(pc) {
|
|
||||||
...
|
|
||||||
case 0x615 ... 0x618:
|
|
||||||
out_ctx.rsp = ctx.rsp + 8;
|
|
||||||
out_ctx.rip =
|
|
||||||
*((uintptr_t*)(out_ctx.rsp - 8));
|
|
||||||
out_ctx.flags = 3u;
|
|
||||||
return out_ctx;
|
|
||||||
...
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,21 +0,0 @@
|
||||||
unwind_context_t _eh_elf(
|
|
||||||
unwind_context_t ctx, uintptr_t pc)
|
|
||||||
{
|
|
||||||
unwind_context_t out_ctx;
|
|
||||||
if(pc < 0x619) { ... }
|
|
||||||
else {
|
|
||||||
if(pc < 0x659) { // IP=0x619 ... 0x658
|
|
||||||
goto _factor_1;
|
|
||||||
}
|
|
||||||
...
|
|
||||||
}
|
|
||||||
|
|
||||||
_factor_1:
|
|
||||||
out_ctx.rsp = ctx.rsp + (48);
|
|
||||||
out_ctx.rip = *((uintptr_t*)(out_ctx.rsp + (-8)));
|
|
||||||
out_ctx.flags = 3u;
|
|
||||||
|
|
||||||
...
|
|
||||||
|
|
||||||
return out_ctx;
|
|
||||||
}
|
|
|
@ -1,5 +0,0 @@
|
||||||
[...] FDE [...] pc=615..65a
|
|
||||||
LOC CFA ra
|
|
||||||
0000000000000615 rsp+8 c-8
|
|
||||||
0000000000000619 rsp+48 c-8
|
|
||||||
0000000000000659 rsp+8 c-8
|
|
|
@ -1,7 +0,0 @@
|
||||||
[...] FDE [...] pc=615..65a
|
|
||||||
DW_CFA_def_cfa: r7 (rsp) ofs 8
|
|
||||||
DW_CFA_offset: r16 (rip) at cfa-8
|
|
||||||
DW_CFA_advance_loc: 4 to 0619
|
|
||||||
DW_CFA_def_cfa_offset: 48
|
|
||||||
DW_CFA_advance_loc1: 64 to 0659
|
|
||||||
DW_CFA_def_cfa_offset: 8
|
|
|
@ -1,18 +0,0 @@
|
||||||
0000000000000615 <fib7>:
|
|
||||||
615: sub $0x28,%rsp ; Alloc stack
|
|
||||||
619: movl $0x1,(%rsp) ; fibo[0]
|
|
||||||
620: movl $0x1,0x4(%rsp) ; fibo[1]
|
|
||||||
628: mov %rsp,%rax ; BEGIN FOR
|
|
||||||
62b: lea 0x18(%rax),%rcx
|
|
||||||
62f: mov (%rax),%edx
|
|
||||||
631: add 0x4(%rax),%edx
|
|
||||||
634: mov %edx,0x8(%rax)
|
|
||||||
637: add $0x4,%rax
|
|
||||||
63b: cmp %rcx,%rax
|
|
||||||
63e: jne 62f <fib7+0x1a> ; END FOR
|
|
||||||
640: mov 0x1c(%rsp),%esi
|
|
||||||
644: lea 0xb9(%rip),%rdi
|
|
||||||
64b: mov $0x0,%eax
|
|
||||||
650: callq 520 <printf@plt>
|
|
||||||
655: add $0x28,%rsp ; Restore rsp
|
|
||||||
659: retq
|
|
Binary file not shown.
|
@ -1,4 +0,0 @@
|
||||||
typedef struct {
|
|
||||||
uint8_t flags; // State (registers filled, error)
|
|
||||||
uintptr_t rip, rsp, rbp, rbx; // Registers' values
|
|
||||||
} unwind_context_t;
|
|
Loading…
Add table
Reference in a new issue