From ef62c3b7e50705893f32d2a73531d4f7493cf806 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Th=C3=A9ophile=20Bastian?= <contact@tobast.fr>
Date: Thu, 20 Jun 2024 17:56:52 +0200
Subject: [PATCH] Small backports from paper

---
 manuscrit/60_staticdeps/35_rob_proof.tex |  2 +-
 manuscrit/60_staticdeps/50_eval.tex      | 36 +++++++++++-------------
 2 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/manuscrit/60_staticdeps/35_rob_proof.tex b/manuscrit/60_staticdeps/35_rob_proof.tex
index 62687ee..569a8b2 100644
--- a/manuscrit/60_staticdeps/35_rob_proof.tex
+++ b/manuscrit/60_staticdeps/35_rob_proof.tex
@@ -59,7 +59,7 @@ $(i_q)_{q\in Q_p}$ are the \uops{} obtained from the decoding of $I_p$.
 
 \begin{lemma}[Distance of in-flight \uops{}]
     For any pair of instructions $(I_p,I_{p'})$, and two corresponding \uops{},
-    $(i_q,i_{q'})$ such that q \in Q_p, q' \in Q_{p'}$,
+    $(i_q,i_{q'})$ such that $q \in Q_p, q' \in Q_{p'}$,
     \[
         \operatorname{inflight}(i_q) \wedge \operatorname{inflight}(i_{q'}) \Rightarrow \distance{I_p}{I_{p'}}<R
     \]
diff --git a/manuscrit/60_staticdeps/50_eval.tex b/manuscrit/60_staticdeps/50_eval.tex
index 93fc59f..6e3294d 100644
--- a/manuscrit/60_staticdeps/50_eval.tex
+++ b/manuscrit/60_staticdeps/50_eval.tex
@@ -28,7 +28,6 @@ benchmarks, making the analysis more convenient.
 In practice, benchmarks from \cesasme{} are roughly of the following form:
 
 \begin{lstlisting}[language=C]
-/* Initialize A, B, C here */
 for(int measure=0; measure < NUM_MEASURES; ++measure) {
     measure_start();
     for(int repeat=0; repeat < NUM_REPEATS; ++repeat) {
@@ -41,7 +40,7 @@ for(int measure=0; measure < NUM_MEASURES; ++measure) {
 \end{lstlisting}
 
 While this is sensible for conducting throughput measures, it also introduces
-unwanted dependencies \todo{explain why}. If, for instance, the kernel consists in
+unwanted dependencies. If, for instance, the kernel consists in
 $A[i] = C\times{}A[i] + B[i]$, implemented by\\
 \begin{minipage}{0.95\linewidth}
 \begin{lstlisting}[language={[x86masm]Assembler}]
@@ -100,8 +99,9 @@ source and destination program counters are not in the same basic block are
 discarded, as \staticdeps{} cannot detect them by construction.
 
 For each of the considered basic blocks, we run our static analysis,
-\staticdeps{}. We discard the $\Delta{}k$ parameter, as our dynamic analysis does
-not report an equivalent parameter, but only a pair of program counters.
+\staticdeps{}. We discard the $\Delta{}k$ parameter --~how many loop iterations
+the dependency spans~--, as our dynamic analysis does not report an equivalent
+parameter, but only a pair of program counters.
 
 Dynamic dependencies from \depsim{} are converted to
 \emph{periodic dependencies} in the sense of \staticdeps{} as described in
@@ -281,25 +281,21 @@ the corresponding box-plots in \autoref{fig:staticdeps_uica_cesasme_boxplot}.
 
 \medskip{}
 
-The full dataset \uicadeps{} row is extremely close, on every metric, to the
-pruned, \uica{}-only row. On this basis, we argue that \staticdeps{}' addition
-to \uica{} is very conclusive: the hints provided by \staticdeps{} are
+We deduce two things from this experiment.
+
+First, the full dataset \uicadeps{} row is extremely close, on every metric, to
+the pruned, \uica{}-only row. On this basis, we argue that \staticdeps{}'
+addition to \uica{} is very conclusive: the hints provided by \staticdeps{} are
 sufficient to make \uica{}'s results as good on the full dataset as they were
 before on a dataset pruned of precisely the kind of dependencies we aim to
-detect. Furthermore, \uica{} and \uicadeps{}' results on the pruned dataset are
-extremely close: this further supports the accuracy of \staticdeps{}.
+detect. Thus, at least on workloads similar to Polybench, \staticdeps{} is able
+to resolve the issue of memory-carried dependencies for \uica{}'s throughput
+analysis.
 
-\medskip{}
-
-While the results obtained against \depsim{} in
-\autoref{ssec:staticdeps_eval_depsim} above were reasonable, they were not
-excellent either, and showed that many kind of dependencies were still missed
-by \staticdeps{}. However, our evaluation on \cesasme{} by enriching \uica{}
-shows that, at least on the workload considered, the dependencies that actually
-matter from a performance debugging point of view are properly found.
-
-This, however, might not be true for other kinds of applications that would
-require a dependencies analysis.
+Furthermore, \uica{} and \uicadeps{}' results on the pruned dataset are
+extremely close. From this, we argue that \staticdeps{} does not introduce
+false positives when no dependency should be found; its addition to \uica{}
+does not negatively impact its accuracy whenever it is not relevant.
 
 \subsection{Analysis speed}