dwarf-semantics/semantics.tex

\title{DWARF Semantics}
\author{Théophile Bastian}
\date{\today}

\documentclass[11pt]{article}

\usepackage[left=2cm,right=2cm,top=2cm,bottom=2cm]{geometry}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{stmaryrd}
\usepackage{mathtools}
\usepackage[utf8]{inputenc}

\usepackage{my_listings}
\usepackage{my_hyperref}

\newcommand{\dwcfa}[1]{\texttt{DW\_CFA\_#1}}
\newcommand{\reg}[1]{\%#1}

\newcommand{\DWARF}{\operatorname{DWARF}}

\newcommand{\FDE}{\operatorname{FDE}}
\newcommand{\dwrow}{\operatorname{Row}}
\newcommand{\spexpr}{\mathbb{E}}
\newcommand{\dwexpr}{\bar{\mathbb{E}}}
\newcommand{\regs}{\mathbb{R}}
\newcommand{\values}{\mathbb{V}}

\newcommand{\valaddr}[1]{\operatorname{Addr}\left(#1\right)}
\newcommand{\valval}[1]{\operatorname{Val}\left(#1\right)}

\newcommand{\intermedlang}{\mathcal{I}}

\newcommand{\semI}[2]{{\left\llbracket{} #1 \right\rrbracket}^\intermedlang_{#2}}
\newcommand{\semC}[1]{{\left\llbracket{} #1 \right\rrbracket}^{C}}
\newcommand{\semR}[1]{{\left\llbracket{} #1 \right\rrbracket}^{R}}

\newcommand{\rowstack}{\mathbb{S}}

\newcommand{\insarrow}[1]{\xleftarrow{\text{#1}}}
\newcommand{\extrarrow}[1]{\xrightarrow{\text{#1}}}

\newcommand{\contsem}[1]{\semI{d}{s}\left(#1\right)}

\newcommand{\ehelf}{\texttt{EH\_ELF}}

\begin{document}
\maketitle

\section{DWARF CFI instructions}

These instructions are up to variants (most instructions exist in multiple
formats to handle various operands formatting, to optimize space).

\begin{itemize}
    \item{} \dwcfa{set\_loc(loc)}~:
        new DWARF row at address $loc$
    \item{} \dwcfa{advance\_loc(delta)}~:
        new DWARF row at address $prev\_loc + delta$
    \item{} \dwcfa{def\_cfa(reg, offset)}~:
        sets CFA at $(\reg{reg} + offset)$
    \item{} \dwcfa{def\_cfa\_register(reg)}~:
        sets CFA at $(\reg{reg} + prev\_offset)$
    \item{} \dwcfa{def\_cfa\_offset(offset)}~:
        sets CFA at $(\reg{prev\_reg} + offset)$
    \item{} \dwcfa{def\_cfa\_expression(expr)}~:
        defines CFA as the result of $expr$
    \item{} \dwcfa{undefined(reg)}~:
        sets \reg{reg} undefined in the current (last) row
    \item{} \dwcfa{same\_value(reg)}~:
        sets \reg{reg} in the current (last) row as it was in the previous row
    \item{} \dwcfa{offset(reg, offset)}~:
        \reg{reg} is stored in memory at the address $CFA + offset$ in the
        current (last) row
    \item{} \dwcfa{val\_offset(reg, offset)}~:
        \reg{reg} is the value $CFA + offset$ in the current (last) row
    \item{} \dwcfa{register(reg, model)}~:
        \reg{reg} has in the current (last) row the value that had
        $\reg{model}$ in the previous row
    \item{} \dwcfa{expression(reg, expr)}~:
        \reg{reg} is stored in memory at the address defined by $expr$
    \item{} \dwcfa{val\_expression(reg, expr)}~:
        \reg{reg} has the value of $expr$
    \item{} \dwcfa{restore(reg)}~:
        \reg{reg} has the same value as in this FDE's preamble (CIE) in the
        current (last) row. This is \emph{not implemented in this semantics}
        for simplicity and brevity (we would have to introduce CIE (preamble)
        and FDE (body) independently).
    \item{} \dwcfa{remember\_state()}~:
        push the state of all the registers of the current (last) row in an
        implicit stack
    \item{} \dwcfa{restore\_state()}~:
        pop an entry of the implicit stack, and set all registers in the
        current (last) row accordingly.
    \item{} \dwcfa{nop()}~:
        do nothing (padding)
\end{itemize}

\pagebreak

\section{Intermediary language $\intermedlang$}

\begin{align*}
    \FDE &:= {\left(\mathbb{Z} \times \dwrow \right)}^{\ast}
        & \text{FDE (set of rows)} \\
    \dwrow &:= \values ^ \regs
        & \text{A DWARF row} \\
    \spexpr &:= \regs \times \mathbb{Z}
        & \text{A ``simple'' expression $\reg{reg} + offset$} \\
    \spexpr &:= \regs \times \mathbb{Z}
        & \text{A ``simple'' expression $\reg{reg} + offset$} \\
    \regs &:= \left\{0, 1, \ldots, \operatorname{NB\_REGS - 1} \right\}
        & \text{Machine registers} \\
    \values &:= \{\bot\} \cup \valaddr{\spexpr} \cup \valval{\spexpr}
        & \text{Values: undefined, at address $x$, value $x$} \\
\end{align*}


\section{From DWARF to $\intermedlang$}

We define $\semI{\bullet}{s} : \DWARF \times \FDE \to \FDE$, for $s$ a stack of
$\dwrow$, that is,

\begin{align*}
    s \in \rowstack &:= \dwrow^\ast
\end{align*}

Implicitly, $\semI{\bullet}{} := \semI{\bullet}{\varepsilon}$

For convenience, we define $\insarrow{reg}$, the operator changing the value of
a register for a given value in the last row, as

\[
    \left(f \in \FDE\right) \insarrow{$r \in \regs$} (v \in values)
    \quad := \quad
    \left( f\left[0 \ldots |f| - 2\right] \right) \cdot \left\{
        \begin{array}{r l}
            r' \neq r &\mapsto \left(f[-1]\right)(r') \\
            r &\mapsto v \\
        \end{array} \right.
\]

The same way, we define $\extrarrow{reg}$ that \emph{extracts} the rule
currently applied for $\reg{reg}$, eg. $F \extrarrow{CFA} \valval{\reg{reg} +
\text{off}}$. If the rule currently applied in such a case is \emph{not} of the
form $\reg{reg} + \text{off}$, then the program is considered erroneous.

More generally, we define ${\extrarrow{reg}}^{-k}$ as the same operation, but
extracting in the $k$-older row, ie. ${\extrarrow{reg}}^{0}$ is the same as
$\extrarrow{reg}$, and $F {\extrarrow{reg}}^{-1} \bullet$ is the same as
$F\left[0 \ldots |F|-2\right] \extrarrow{reg} \bullet$.

\begin{align*}
    \semI{\varepsilon}{s}(F) &:= F \\
    \semI{\dwcfa{set\_loc(loc)} \cdot d}{s}(F) &:=
        \contsem{F \cdot \left(loc, F[-1].row \right)} \\
    \semI{\dwcfa{adv\_loc(delta)} \cdot d}{s}(F) &:=
        \contsem{F \cdot \left(F[-1].addr + delta, F[-1].row \right)} \\
    \semI{\dwcfa{def\_cfa(reg, offset)} \cdot d}{s}(F) &:=
        \contsem{F \insarrow{CFA} \valval{\reg{reg} + offset}} \\
    \semI{\dwcfa{def\_cfa\_register(reg)} \cdot d}{s}(F) &:=
        \text{let F }\extrarrow{CFA} \valval{\reg{oldreg} + \text{oldoffset}}
        \text{ in} \\
        &\quad \contsem{F \insarrow{CFA} \valval{\reg{reg} + oldoffset}} \\
    \semI{\dwcfa{def\_cfa\_offset(offset)} \cdot d}{s}(F) &:=
        \text{let F }\extrarrow{CFA} \valval{\reg{oldreg} + \text{oldoffset}}
        \text{ in} \\
        &\quad \contsem{F \insarrow{CFA} \valval{\reg{oldreg} + offset}} \\
    \semI{\dwcfa{def\_cfa\_expression(expr)} \cdot d}{s}(F) &:=
        \text{TO BE DEFINED} &\textbf{CHECK ME?}\\
    \semI{\dwcfa{undefined(reg)} \cdot d}{s}(F) &:=
        \contsem{F \insarrow{reg} \bot} \\
    \semI{\dwcfa{same\_value(reg)} \cdot d}{s}(F) &:=
        \valval{\reg{reg}} &\textbf{CHECK ME}\\
    \semI{\dwcfa{offset(reg, offset)} \cdot d}{s}(F) &:=
        \contsem{F \insarrow{reg} \valaddr{\reg{CFA} + \text{offset}}} \\
    \semI{\dwcfa{val\_offset(reg, offset)} \cdot d}{s}(F) &:=
        \contsem{F \insarrow{reg} \valval{\reg{CFA} + \text{offset}}} \\
    \semI{\dwcfa{register(reg, model)} \cdot d}{s}(F) &:=
        \text{let } F {\extrarrow{model}}^{-1} r \text{ in }
        \contsem{F \insarrow{reg} r} \\
    \semI{\dwcfa{expression(reg, expr)} \cdot d}{s}(F) &:=
        \text{TO BE DEFINED} &\textbf{CHECK ME?}\\
    \semI{\dwcfa{val\_expression(reg, expr)} \cdot d}{s}(F) &:=
        \text{TO BE DEFINED} &\textbf{CHECK ME?}\\
%    \semI{\dwcfa{restore(reg)} \cdot d}{s}(F) &:= \\  %% NOT IMPLEMENTED
    \semI{\dwcfa{remember\_state()} \cdot d}{s}(F) &:=
        \semI{d}{s \cdot F[-1].row}\left(F\right) \\
    \semI{\dwcfa{restore\_state()} \cdot d}{s \cdot t}(F) &:=
        \semI{d}{s}\left(F\left[0 \ldots |F|-2\right] \cdot
        \left(F[-1].addr, t\right) \right) \\
    \semI{\dwcfa{nop()} \cdot d}{s}(F) &:= \contsem{F}\\
\end{align*}

(The stack is used for \texttt{remember\_state}, \texttt{restore\_state},
\texttt{restore})


\section{From $\intermedlang$ to C}

\textit{This only defines the semantics, with respect to standard C, of DWARF
as interpreted by \ehelf\@. It is not implemented this way.}

We now define $\semC{\bullet} : \DWARF \to C$.

These semantics are to be interpreted in the following context~:

\lstinputlisting[language=C]{src/c_context.c}

Then, the function \lstc{unwind_frame}, given an instruction pointer and the
state of the registers in the previous call frame, returns the state of
registers after the current call frame is unwinded. In other words, assuming
the C language is capable of formal operations, a call to
\lstc{unwind_frame(ip, {reg_0, reg_1, ..., reg_N-1})} should return something
equivalent to the corresponding DWARF row.

The translation from $\intermedlang$ to C is defined as follows:

\begin{itemize}
    \item $\semC{\varepsilon} =$ \\
        \begin{lstlisting}[language=C, mathescape=true]
            else {
                for(int reg=0; reg < NB_REGS; ++reg)
                    new_ctx[reg] = $\semR{\bot}$;
            }
        \end{lstlisting}

    \item $\semC{(\text{loc}, \text{row}) \cdot t} = C\_code + \semC{t}$, where
        $C\_code$ is
        \begin{lstlisting}[language=C, mathescape=true]
            if(ip >= $loc$) {
                for(int reg=0; reg < NB_REGS; ++reg)
                    new_ctx[reg] = $\semR{row[reg]}$;
                goto end_ifs; // Avoid if/else if problems
            }
        \end{lstlisting}
\end{itemize}

and $\semR{\bullet}$ is defined as
\begin{align*}
    \semR{\bot} &= \text{\lstc{ERROR_VALUE}} \\
    \semR{\valaddr{\text{reg}, \text{offset}}} &=
        \text{\lstc{*(old_ctx[reg] + offset)}} \\
    \semR{\valval{\text{reg}, \text{offset}}} &=
        \text{\lstc{(old_ctx[reg] + offset)}} \\
\end{align*}

\end{document}