\title{DWARF Semantics} \author{Théophile Bastian} \date{\today} \documentclass[11pt]{article} \usepackage[left=2cm,right=2cm,top=2cm,bottom=2cm]{geometry} \usepackage{amsmath} \usepackage{amssymb} \usepackage{stmaryrd} \usepackage{mathtools} \usepackage[utf8]{inputenc} \newcommand{\dwcfa}[1]{\texttt{DW\_CFA\_#1}} \newcommand{\reg}[1]{\%#1} \newcommand{\DWARF}{\operatorname{DWARF}} \newcommand{\FDE}{\operatorname{FDE}} \newcommand{\dwrow}{\operatorname{Row}} \newcommand{\spexpr}{\mathbb{E}} \newcommand{\regs}{\mathbb{R}} \newcommand{\values}{\mathbb{V}} \newcommand{\valaddr}[1]{\operatorname{Addr}\left(#1\right)} \newcommand{\valval}[1]{\operatorname{Val}\left(#1\right)} \newcommand{\intermedlang}{\mathcal{I}} \newcommand{\semI}[2]{{\left\llbracket{} #1 \right\rrbracket}^\intermedlang_{#2}} \newcommand{\rowstack}{\mathbb{S}} \newcommand{\insarrow}[1]{\xleftarrow{\text{#1}}} \newcommand{\extrarrow}[1]{\xrightarrow{\text{#1}}} \newcommand{\contsem}[1]{\semI{d}{s}\left(#1\right)} \begin{document} \maketitle \section{DWARF CFI instructions} These instructions are up to variants (most instructions exist in multiple formats to handle various operands formatting, to optimize space). \begin{itemize} \item{} \dwcfa{set\_loc(loc)}~: new DWARF row at address $loc$ \item{} \dwcfa{advance\_loc(delta)}~: new DWARF row at address $prev\_loc + delta$ \item{} \dwcfa{def\_cfa(reg, offset)}~: sets CFA at $(\reg{reg} + offset)$ \item{} \dwcfa{def\_cfa\_register(reg)}~: sets CFA at $(\reg{reg} + prev\_offset)$ \item{} \dwcfa{def\_cfa\_offset(offset)}~: sets CFA at $(\reg{prev\_reg} + offset)$ \item{} \dwcfa{def\_cfa\_expression(expr)}~: defines CFA as the result of $expr$ \item{} \dwcfa{undefined(reg)}~: sets \reg{reg} undefined in the current (last) row \item{} \dwcfa{same\_value(reg)}~: sets \reg{reg} in the current (last) row as it was in the previous row \item{} \dwcfa{offset(reg, offset)}~: \reg{reg} is stored in memory at the address $CFA + offset$ in the current (last) row \item{} \dwcfa{val\_offset(reg, offset)}~: \reg{reg} is the value $CFA + offset$ in the current (last) row \item{} \dwcfa{register(reg, model)}~: \reg{reg} has in the current (last) row the value that had $\reg{model}$ in the previous row \item{} \dwcfa{expression(reg, expr)}~: \reg{reg} is stored in memory at the address defined by $expr$ \item{} \dwcfa{val\_expression(reg, expr)}~: \reg{reg} has the value of $expr$ \item{} \dwcfa{restore(reg)}~: \reg{reg} has the same value as in this FDE's preamble (CIE) in the current (last) row. This is \emph{not implemented in this semantics} for simplicity and brevity (we would have to introduce CIE (preamble) and FDE (body) independently). \item{} \dwcfa{remember\_state()}~: push the state of all the registers of the current (last) row in an implicit stack \item{} \dwcfa{restore\_state()}~: pop an entry of the implicit stack, and set all registers in the current (last) row accordingly. \item{} \dwcfa{nop()}~: do nothing (padding) \end{itemize} \pagebreak \section{Intermediary language $\intermedlang$} \begin{align*} \FDE &:= {\left(\mathbb{Z} \times \dwrow \right)}^{\ast} & \text{FDE (set of rows)} \\ \dwrow &:= \values ^ \regs & \text{A DWARF row} \\ \spexpr &:= \regs \times \mathbb{Z} & \text{A ``simple'' expression $\reg{reg} + offset$} \\ \regs &:= \left\{0, 1, \ldots, \operatorname{NB\_REGS - 1} \right\} & \text{Machine registers} \\ \values &:= \{\bot\} \cup \valaddr{\spexpr} \cup \valval{\spexpr} & \text{Values: undefined, at address $x$, value $x$} \\ \end{align*} \section{From DWARF to $\intermedlang$} We define $\semI{\bullet}{s} : \DWARF \times \FDE \to \FDE$, for $s$ a stack of $\dwrow$, that is, \begin{align*} s \in \rowstack &:= \dwrow^\ast \end{align*} Implicitly, $\semI{\bullet}{} := \semI{\bullet}{\varepsilon}$ For convenience, we define $\insarrow{reg}$, the operator changing the value of a register for a given value in the last row, as \[ \left(f \in \FDE\right) \insarrow{$r \in \regs$} (v \in values) \quad := \quad \left( f\left[0 \ldots |f| - 2\right] \right) \cdot \left\{ \begin{array}{r l} r' \neq r &\mapsto \left(f[-1]\right)(r') \\ r &\mapsto v \\ \end{array} \right. \] The same way, we define $\extrarrow{reg}$ that \emph{extracts} the rule currently applied for $\reg{reg}$, eg. $F \extrarrow{CFA} \valval{\reg{reg} + \text{off}}$. If the rule currently applied in such a case is \emph{not} of the form $\reg{reg} + \text{off}$, then the program is considered erroneous. More generally, we define ${\extrarrow{reg}}^{-k}$ as the same operation, but extracting in the $k$-older row, ie. ${\extrarrow{reg}}^{0}$ is the same as $\extrarrow{reg}$, and $F {\extrarrow{reg}}^{-1} \bullet$ is the same as $F\left[0 \ldots |F|-2\right] \extrarrow{reg} \bullet$. \begin{align*} \semI{\varepsilon}{s}(F) &:= F \\ \semI{\dwcfa{set\_loc(loc)} \cdot d}{s}(F) &:= \contsem{F \cdot \left(loc, F[-1].row \right)} \\ \semI{\dwcfa{adv\_loc(delta)} \cdot d}{s}(F) &:= \contsem{F \cdot \left(F[-1].addr + delta, F[-1].row \right)} \\ \semI{\dwcfa{def\_cfa(reg, offset)} \cdot d}{s}(F) &:= \contsem{F \insarrow{CFA} \valval{\reg{reg} + offset}} \\ \semI{\dwcfa{def\_cfa\_register(reg)} \cdot d}{s}(F) &:= \text{let F }\extrarrow{CFA} \valval{\reg{oldreg} + \text{oldoffset}} \text{ in} \\ &\quad \contsem{F \insarrow{CFA} \valval{\reg{reg} + oldoffset}} \\ \semI{\dwcfa{def\_cfa\_offset(offset)} \cdot d}{s}(F) &:= \text{let F }\extrarrow{CFA} \valval{\reg{oldreg} + \text{oldoffset}} \text{ in} \\ &\quad \contsem{F \insarrow{CFA} \valval{\reg{oldreg} + offset}} \\ \semI{\dwcfa{def\_cfa\_expression(expr)} \cdot d}{s}(F) &:= \text{TO BE DEFINED} &\textbf{CHECK ME?}\\ \semI{\dwcfa{undefined(reg)} \cdot d}{s}(F) &:= \contsem{F \insarrow{reg} \bot} \\ \semI{\dwcfa{same\_value(reg)} \cdot d}{s}(F) &:= \valval{\reg{reg}} &\textbf{CHECK ME}\\ \semI{\dwcfa{offset(reg, offset)} \cdot d}{s}(F) &:= \contsem{F \insarrow{reg} \valaddr{\reg{CFA} + \text{offset}}} \\ \semI{\dwcfa{val\_offset(reg, offset)} \cdot d}{s}(F) &:= \contsem{F \insarrow{reg} \valval{\reg{CFA} + \text{offset}}} \\ \semI{\dwcfa{register(reg, model)} \cdot d}{s}(F) &:= \text{let } F {\extrarrow{model}}^{-1} r \text{ in } \contsem{F \insarrow{reg} r} \\ \semI{\dwcfa{expression(reg, expr)} \cdot d}{s}(F) &:= \text{TO BE DEFINED} &\textbf{CHECK ME?}\\ \semI{\dwcfa{val\_expression(reg, expr)} \cdot d}{s}(F) &:= \text{TO BE DEFINED} &\textbf{CHECK ME?}\\ % \semI{\dwcfa{restore(reg)} \cdot d}{s}(F) &:= \\ %% NOT IMPLEMENTED \semI{\dwcfa{remember\_state()} \cdot d}{s}(F) &:= \semI{d}{s \cdot F[-1].row}\left(F\right) \\ \semI{\dwcfa{restore\_state()} \cdot d}{s \cdot t}(F) &:= \semI{d}{s}\left(F\left[0 \ldots |F|-2\right] \cdot \left(F[-1].addr, t\right) \right) \\ \semI{\dwcfa{nop()} \cdot d}{s}(F) &:= \contsem{F}\\ \end{align*} (The stack is used for \texttt{remember\_state}, \texttt{restore\_state}, \texttt{restore}) \section{From $\intermedlang$ to C} (Quite straightforward?) \end{document}