Speedup: gzip -> perf gzip

Unw data slow: not by dft. -> must dump to disk
What does this imply: enhance
2019-10-19 08:17:15 +02:00 · 2019-10-19 08:09:22 +02:00 · 2019-10-19 07:56:54 +02:00 · 2019-10-19 07:18:00 +02:00 · 2019-10-19 07:14:07 +02:00 · 2019-10-19 07:13:48 +02:00
5 changed files with 90 additions and 46 deletions
--- a/img/call_stack.png
+++ b/img/call_stack.png
--- a/img/call_stack.xcf
+++ b/img/call_stack.xcf
--- a/slides.tex
+++ b/slides.tex
@ -36,9 +36,9 @@
 \newcommand{\cmark}{\color{OliveGreen}\ding{52}}
 \newcommand{\xmark}{\color{BrickRed}\ding{56}}

-\let\tempone\itemize
-\let\temptwo\enditemize
-\renewenvironment{itemize}{\tempone\addtolength{\itemsep}{0.5\baselineskip}}{\temptwo}
+%\let\tempone\itemize
+%\let\temptwo\enditemize
+%\renewenvironment{itemize}{\tempone\addtolength{\itemsep}{0.5\baselineskip}}{\temptwo}



@ -176,7 +176,7 @@ Segmentation fault.
            \end{lstlisting}
            \pause{}
            \begin{center}
-                \textbf{\Large How does it work?!}
+                \textbf{\Large How does it work?}
            \end{center}
        \end{column}
        \begin{column}{0.35\textwidth}
@ -194,7 +194,8 @@ Segmentation fault.
        \begin{column}{0.65\textwidth}
            \begin{center}
                \large\bf
-                How do we get the RA\@?
+                How do we get\\
+                the return address?

                \vspace{2em}

@ -273,22 +274,25 @@ Segmentation fault.
    \vfill

    \begin{itemize}
-        \item[\textbf{$\longrightarrow$}] \textbf{\alert{constructed} on-demand
-            by a \alert{Turing-complete stack machine}!}
+            \bf
+        \item[\textbf{$\longrightarrow$}] \alert{bytecode} for a
+            \alert{Turing-complete stack machine}
+        \item[\textbf{$\longrightarrow$}] which is \alert{interpreted on
+            demand at runtime}\\to reconstruct the table
    \end{itemize}
 \end{frame}

 \begin{frame}{What does this imply?}
-    Your compiler actually generates codes for \alert{two machines}: your
-        processor and the DWARF VM\@.
+    Your compiler actually generates codes for \alert{two machines}:\\
+    your processor and the DWARF VM\@.

    \vfill{}
    \begin{columns}
-        \begin{column}{0.5\textwidth}
+        \begin{column}{0.45\textwidth}
            \begin{center}
                \begin{tikzpicture}
                    \begin{scope}[every node/.style={rectangle,thick,draw,scale=0.95}]
-                        \node (cmd) at (0, 2.5) {
+                        \node (cmd) at (0, 3.0) {
                            \lstbash{\$ gcc -S foo.c}
                        };
                        \node (asm) at (0, 0) {
@ -300,25 +304,36 @@ Segmentation fault.
                        \path [->] (cmd) -- (asm);
                    \end{scope}
                \end{tikzpicture}
-                \medskip{}
+                %\vspace{0.2em}

-                \textbf{The \lstc{.cfi_*} is inline DWARF!}
+                \textbf{\lstc{.cfi_*}: \alert{inline DWARF!}}
            \end{center}
        \end{column}

-        \begin{column}{0.5\textwidth}
-            \begin{center}
-                \bf
-                $\implies$ if you write inline asm, you must write inline DWARF!
-            \end{center}
+        \begin{column}{0.55\textwidth}
+            \begin{itemize}
+                \item[$\implies$] \alert{Cumbersome} to generate for the
+                    \alert{compiler}
+                    \begin{itemize}
+                        \item[$\leadsto$] might do it wrong
+                        \item[$\leadsto$] might not do it at all
+                    \end{itemize}

-            In \prog{glibc}, \prog{lowlevellock.h}:
-            \alert{off by one error in unwinding data}.
+                \item[$\implies$] If you write \alert{inline asm}, \alert{you} must write
+                    inline DWARF\@!
+            \end{itemize}

-            \lstinputlisting[language=gdb,numbers=none]{src/lowlevellock_backtrace}
        \end{column}
    \end{columns}
+\end{frame}

+\begin{frame}
+    \todo{Actually add lowlevellock code}
+
+    In \prog{glibc}, \prog{lowlevellock.h}:
+    \alert{off by one error in unwinding data}.
+
+    \lstinputlisting[language=gdb,numbers=none]{src/lowlevellock_backtrace}

    \only<2->{
        \begin{textblock*}{0.90\textwidth}[0.5,0](0.5\paperwidth,0.10\paperheight)%
@ -393,7 +408,7 @@ Segmentation fault.
 \end{frame}

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{Correct by construction unwinding tables: synthesis}
+\section{Correctness by construction:\\*\textbf{synthesis of unwinding tables}}
 \sectiontitleframe{}

 \newcommand{\tblrowval}[4]{#1 & #2 & \only<2->{#3} & \only<2->{#4} \\}
@ -424,15 +439,16 @@ Segmentation fault.


    {\only<3>{
-		\begin{textblock*}{0.90\textwidth}[0.5,0.5](0.5\paperwidth,0.5\paperheight)%
+		\begin{textblock*}{\textwidth}[0.5,0.5](0.5\paperwidth,0.5\paperheight)%
 			\begin{tcolorbox}[halign=center, colframe=red, colback=Lavender]
 				\large
-				\alert{\bf Assumptions:}
+				\alert{\bf Assumptions}
+                \vspace{0.6em}
                \begin{itemize}
-                    \item the assembly is was generated by a compiler
-                    \item which also generated unwinding data
-                    \item and I have a reliable DWARF interpreter
+                    \item the compiler generated the unwinding data
+                    \item we have a reliable DWARF interpreter
                \end{itemize}
+
 			\end{tcolorbox}
 		\end{textblock*}
 	}}
@ -482,7 +498,8 @@ Segmentation fault.
                \item Heuristic to decide whether we index with \reg{rbp} or
                    \reg{rsp}
            \end{itemize}
-        \item By performing symbolic execution, we can \alert{synthesize the
+        \item With a \alert{symbolic execution} with an abstract semantics,\\
+            we can \alert{synthesize the
            unwinding table} line by line.
        \item Control flow: forward data-flow analysis
        \item The fixpoints are immediate, cf article
@ -518,8 +535,10 @@ Segmentation fault.

    \pause{}

-    So much that \prog{perf} doesn't use it by default! You must use
-    \lstbash{perf report --call-graph dwarf} explicitly!
+    So much that \prog{perf} cannot unwind online!
+
+    It must \alert{copy to disk the whole call stack} every few instants and
+    \alert{analyze it later} at report time!
 \end{frame}

 \sectiontitleframe{}
@ -579,12 +598,12 @@ Segmentation fault.

 \begin{frame}
    \begin{itemize}
-        \item \alert{libunwind}: \textit{de facto} standard library for
+        \item \alert{libunwind}: most common library for
            unwinding

            \bigskip{}

-        \item \texttt{libunwind-eh\_elf}: alternative implementation using
+        \item \alert{\texttt{libunwind-eh\_elf}}: modified version to support
            \ehelfs{}

        \item[$\leadsto$] Same API, almost \alert{``relink-and-play''} for existing projects!
@ -597,15 +616,11 @@ Segmentation fault.

 \begin{frame}{Performances}
    \begin{center}
-        \Large\bf Speedup vs. libunwind:
-        \begin{columns}
-            \begin{column}{0.5\textwidth}
-                \alert{x15} on gzip
-            \end{column}
-            \begin{column}{0.5\textwidth}
-                \alert{x25} on hackbench
-            \end{column}
-        \end{columns}
+        \Large\bf Unwinding speedup vs.\ libunwind:
+        \begin{tabular}{rl}
+            \alert{x15} &on \prog{\tt{}perf gzip}\\
+            \alert{x25} &on \prog{\tt{}perf hackbench}\\
+        \end{tabular}
    \end{center}

    \vfill
@ -635,8 +650,8 @@ Segmentation fault.
        \item{} Synthesis + compare = verification of unwinding data!
        \item{} Integrate synthesis into compilers \& debuggers\\
            $\rightarrow$ support for inline assembly, fallback method, \ldots
-        \item{} Integrate into \prog{perf} for a faster analysis
-        \item{} Probably many more cool things to do!
+        \item{} Integrate into \prog{perf} for online unwinding
+        \item{} Probably many more cool projects!
    \end{itemize}

    \vspace{1em}
@ -715,15 +730,15 @@ Segmentation fault.

                \vspace{1em}

-                We cannot hope for an invariant\dots\\
+                We cannot hope for a simple invariant\dots\\
                but the compiler cannot
                either.

                \vspace{1em}

                {
-                    \large\alert{$\implies$} the compiler will \alert{fallback to
-                    \reg{rbp}}\\
+                    \large\alert{$\implies$} the compiler will\\
+                        \alert{fallback to \reg{rbp}}\\
                }
                even with \lstbash{--fomit-frame-pointer}
            \end{center}
--- a/src/main_cfi.s
+++ b/src/main_cfi.s
@ -6,3 +6,5 @@ pushq	%rbp
 movq	%rsp, %rbp
 .cfi_def_cfa_register 6
 subq	$32, %rsp
+movl    %edi, -20(%rbp)
+movq    %rsi, -32(%rbp)
--- a/src/main_cfi_full.s
+++ b/src/main_cfi_full.s
@ -0,0 +1,27 @@
+main:
+.LFB6:
+        .cfi_startproc
+        pushq   %rbp
+        .cfi_def_cfa_offset 16
+        .cfi_offset 6, -16
+        movq    %rsp, %rbp
+        .cfi_def_cfa_register 6
+        subq    $32, %rsp
+        movl    %edi, -20(%rbp)
+        movq    %rsi, -32(%rbp)
+        movq    -32(%rbp), %rax
+        addq    $8, %rax
+        movq    (%rax), %rax
+        movq    %rax, %rdi
+        call    atoi@PLT
+        movl    %eax, -4(%rbp)
+        movl    -4(%rbp), %eax
+        leave
+        .cfi_def_cfa 7, 8
+        ret
+        .cfi_endproc
+.LFE6:
+        .size   main, .-main
+        .ident  "GCC: (GNU) 9.2.0"
+        .section        .note.GNU-stack,"",@progbits
+
Author	SHA1	Message	Date
Théophile Bastian	6791c53069	Speedup: `gzip` -> `perf gzip`	2019-10-19 08:17:15 +02:00
Théophile Bastian	21ca3e2605	Unw data slow: not by dft. -> must dump to disk	2019-10-19 08:09:22 +02:00
Théophile Bastian	24f53aa0de	What does this imply: enhance	2019-10-19 07:56:54 +02:00
Théophile Bastian	660852ce0d	Reword "Constructed on demand..."	2019-10-19 07:18:00 +02:00
Théophile Bastian	d95da5ab8c	Changes made during rehearsal w/ Francesco	2019-10-19 07:14:07 +02:00
Théophile Bastian	cfd73303e6	Call stack img: inline return addrs	2019-10-19 07:13:48 +02:00