Update source code example

2018-08-03 13:59:11 +02:00 · 2018-08-03 13:59:11 +02:00 · a06b1a915a
commit a06b1a915a
parent 7a4ad4454e
7 changed files with 79 additions and 37 deletions
--- a/report/TODO
+++ b/report/TODO
@ -1,3 +1,4 @@
 - Add a sample of geenrated code?
 - Explicitly tell that we only support unwinding, and this only for a few
  registers
+- Make consistant -ise / -ize (eg. optimize)
--- a/report/report.tex
+++ b/report/report.tex
@ -43,12 +43,6 @@ Under supervision of Francesco Zappa-Nardelli\\
 %% Fiche de synthèse %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \input{fiche_synthese}

-%% Abstract %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{abstract}
-    \todo{Is there a need for an abstract, given the presence above of the
-    ``fiche de synthèse''?}
-\end{abstract}
-
 %% Table of contents %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \tableofcontents

@ -185,34 +179,52 @@ valid from its start IP to the start IP of the next row, or the end IP of the
 FDE if it is the last row.

 \begin{minipage}{0.45\textwidth}
-    \lstinputlisting[language=C, firstline=3, lastline=12]
+    \lstinputlisting[language=C, firstline=3, lastline=12,
+                     caption={Original C},label={lst:ex1_c}]
        {src/fib7/fib7.c}
 \end{minipage} \hfill \begin{minipage}{0.45\textwidth}
-    \lstinputlisting[language=C]{src/fib7/fib7.fde}
+    \lstinputlisting[language=C,caption={Processed DWARF},label={lst:ex1_dw}]
+        {src/fib7/fib7.fde}
+    \lstinputlisting[language=C,caption={Raw DWARF},label={lst:ex1_dwraw}]
+        {src/fib7/fib7.raw_fde}
 \end{minipage}

-For instance, the C source code above, when compiled with \lstbash{gcc -O0
-fomit-frame-pointer}, gives the table at its right. During the function
-prelude, \ie{} for $\mhex{675} \leq \reg{rip} < \mhex{679}$, the stack frame
-only contains the return address, thus the CFA is 8 bytes above \reg{rsp}
-(which was the value of \reg{rsp} before the call), and the return address is
-precisely at \reg{rsp}. Then, 9 integers of 8 bytes each (8 for \lstc{fibo},
-one for \lstc{pos}) are allocated on the stack, which puts the CFA 80 bytes
-above \reg{rsp}, and the return address still 8 bytes below the CFA\@. Then, by
-the end of the function, the local variables are discarded and \reg{rsp} is
-reset to its value from the first row.
+\begin{minipage}{0.45\textwidth}
+    \lstinputlisting[language={[x86masm]Assembler},lastline=11,
+                     caption={Generated assembly},label={lst:ex1_asm}]
+        {src/fib7/fib7.s}
+\end{minipage} \hfill \begin{minipage}{0.45\textwidth}
+    \lstinputlisting[language={[x86masm]Assembler},firstline=12,
+                     firstnumber=last]
+        {src/fib7/fib7.s}
+\end{minipage}

-However, DWARF data isn't actually stored as a table in the binary files. The
-first row has the location of the first IP in the FDE, and must define at least
-its CFA\@. Then, when all relevant registers are defined, it is possible to
-define a new row by providing a location offset (\eg{} here $4$), and the new
-row is defined as a clone of the previous one, which can then be altered (\eg{}
-here by setting \lstc{CFA} to $\reg{rsp} + 80$). This means that every line is
-defined \wrt{} the previous one, and that the IPs of the successive rows cannot
-be determined before evaluating every row before. Thus, unwinding a frame from
-an IP close to the end of the frame will require evaluating pretty much every
-DWARF row in the table before reaching the relevant information, slowing down
-drastically the unwinding process.
+For instance, the C source code in Listing~\ref{lst:ex1_c} above, when compiled
+with \lstbash{gcc -O1 -fomit-frame-pointer -fno-stack-protector}, yields the
+assembly code in Listing~\ref{lst:ex1_asm}. When interpreting the generated
+\ehframe{} with \lstbash{readelf -wF}, we obtain the (slightly edited)
+Listing~\ref{lst:ex1_dw}. During the function prelude, \ie{} for $\mhex{615}
+\leq \reg{rip} < \mhex{619}$, the stack frame only contains the return address,
+thus the CFA is 8 bytes above \reg{rsp} (which was the value of \reg{rsp}
+before the call), and the return address is precisely at \reg{rsp}. Then, 9
+integers of 8 bytes each (8 for \lstc{fibo}, one for \lstc{pos}) are allocated
+on the stack, which puts the CFA 80 bytes above \reg{rsp}, and the return
+address still 8 bytes below the CFA\@. Then, by the end of the function, the
+local variables are discarded and \reg{rsp} is reset to its value from the
+first row.
+
+However, DWARF data isn't actually stored as a table in the binary files, but
+is instead stored as in Listing~\ref{lst:ex1_dwraw}. The first row has the
+location of the first IP in the FDE, and must define at least its CFA\@. Then,
+when all relevant registers are defined, it is possible to define a new row by
+providing a location offset (\eg{} here $4$), and the new row is defined as a
+clone of the previous one, which can then be altered (\eg{} here by setting
+\lstc{CFA} to $\reg{rsp} + 80$). This means that every line is defined \wrt{}
+the previous one, and that the IPs of the successive rows cannot be determined
+before evaluating every row before. Thus, unwinding a frame from an IP close to
+the end of the frame will require evaluating pretty much every DWARF row in the
+table before reaching the relevant information, slowing down drastically the
+unwinding process.

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{How big are FDEs?}
@ -565,6 +577,7 @@ The major drawback of this approach, without any particular care taken, is the
 space waste.

 \begin{table}[h]
+    \centering
    \begin{tabular}{r r r r r r}
        \toprule
        \thead{Shared object} & \thead{Original \\ program size}
--- a/report/src/fib7/Makefile
+++ b/report/src/fib7/Makefile
@ -1,4 +1,4 @@
 all: fib7.bin

 fib7.bin: fib7.c
-	gcc -O1 $< -o $@
+	gcc -fomit-frame-pointer -fno-stack-protector -O1 $< -o $@
--- a/report/src/fib7/fib7.c
+++ b/report/src/fib7/fib7.c
@ -1,6 +1,6 @@
 #include <stdio.h>

-int fib7() {
+void fib7() {
    int fibo[8];
    fibo[0] = 1;
    fibo[1] = 1;
@ -8,10 +8,10 @@ int fib7() {
        fibo[pos] =
            fibo[pos - 1]
            + fibo[pos - 2];
-    return fibo[7];
+    printf("%d\n", fibo[7]);
 }

 int main(void) {
-    printf("%d\n", fib7());
+    fib7();
    return 0;
 }
--- a/report/src/fib7/fib7.fde
+++ b/report/src/fib7/fib7.fde
@ -1,5 +1,5 @@
-[...] FDE [...] pc=675..6f3
+[...] FDE [...]  pc=615..65a
   LOC           CFA      ra
-0000000000000675 rsp+8    c-8
-0000000000000679 rsp+80   c-8
-00000000000006f2 rsp+8    c-8
+0000000000000615 rsp+8    c-8
+0000000000000619 rsp+48   c-8
+0000000000000659 rsp+8    c-8
--- a/report/src/fib7/fib7.raw_fde
+++ b/report/src/fib7/fib7.raw_fde
@ -0,0 +1,7 @@
+[...] FDE [...] pc=615..65a
+  DW_CFA_def_cfa: r7 (rsp) ofs 8
+  DW_CFA_offset: r16 (rip) at cfa-8
+  DW_CFA_advance_loc: 4 to 0619
+  DW_CFA_def_cfa_offset: 48
+  DW_CFA_advance_loc1: 64 to 0659
+  DW_CFA_def_cfa_offset: 8
--- a/report/src/fib7/fib7.s
+++ b/report/src/fib7/fib7.s
@ -0,0 +1,21 @@
+0000000000000615 <fib7>:
+ 615:	sub    $0x28,%rsp ; Alloc stack
+ 619:	movl   $0x1,(%rsp) ; fibo[0]
+ 620:	movl   $0x1,0x4(%rsp) ; fibo[1]
+ 627:
+ ; Begin for loop
+ 628:	mov    %rsp,%rax
+ 62b:	lea    0x18(%rax),%rcx
+ 62f:	mov    (%rax),%edx
+ 631:	add    0x4(%rax),%edx
+ 634:	mov    %edx,0x8(%rax)
+ 637:	add    $0x4,%rax
+ 63b:	cmp    %rcx,%rax
+ 63e:	jne    62f <fib7+0x1a>
+ ; End for loop
+ 640:	mov    0x1c(%rsp),%esi
+ 644:	lea    0xb9(%rip),%rdi
+ 64b:	mov    $0x0,%eax
+ 650:	callq  520 <printf@plt>
+ 655:	add    $0x28,%rsp ; Restore rsp
+ 659:	retq