mpri-funcprog-project/coq/ClosureConversion.v
2017-10-05 17:57:33 +02:00

350 lines
13 KiB
Coq

Require Import List.
Require Import MyList.
Require Import MyTactics.
Require Import Sequences.
Require Import LambdaCalculusSyntax.
Require Import LambdaCalculusFreeVars.
Require Import LambdaCalculusValues.
Require Import LambdaCalculusBigStep.
Require Import MetalSyntax.
Require Import MetalBigStep.
(* This file contains a definition of closure conversion and a proof of
semantic preservation. Closure conversion is a transformation of the
lambda-calculus into a lower-level calculus, nicknamed Metal, where
lambda-abstractions must be closed. *)
(* The definition is slightly painful because we are using de Bruijn indices.
(That said, it is likely that using named variables would be painful too,
just in other ways.) One important pattern that we follow is that, as soon
as a variable [x] is no longer useful, we use [eos x _] to make it go out
of scope. Following this pattern is not just convenient -- it is actually
necessary. *)
(* The main transformation function, [cc], has type [nat -> term -> metal]. If
[t] is a source term with [n] free variables, then [cc n t] is its image
under the transformation. *)
(* -------------------------------------------------------------------------- *)
(* -------------------------------------------------------------------------- *)
(* The auxiliary function [cc_lam n cct] defines the transformation of
the body of a lambda-abstraction. This auxiliary function must be isolated
because it is used twice, once within [cc], and once within [cc_value]. *)
(* The parameter [n] is supposed to be the number of free variables of the
lambda-abstraction, and the parameter [cct] is the body of the transformed
lambda-abstraction. That is, [cct] is expected to be later instantiated
with [cc (n + 1) t], where [t] is the body of the source
lambda-abstraction. *)
(* The term [cc_lam n cct] has just one free variable, namely 0, which
represents the formal parameter of the transformed lambda-abstraction. This
parameter is expected to be a pair [(clo, x)], where [clo] is the closure
and [x] is the formal parameter of the source lambda-abstraction. *)
(* In nominal / informal syntax, the code would be roughly:
let (clo, x) = _ in
let x_1, ..., x_n = pi_1 clo, ..., pi_n clo in
cct
*)
(* We use meta-level [let] bindings, such as [let clo_x := 0 in ...].
These bindings generate no Metal code -- they should not be confused
with Metal [MLet] bindings. They are just a notational convenience. *)
Definition cc_lam (n : nat) (cct : metal) : metal :=
(* Let us refer to variable 0 as [clo_x]. *)
let clo_x := 0 in
(* Initially, the variable [clo_x] is bound to a pair of [clo] and [x].
Decompose this pair, then let [clo_x] go out of scope. *)
MLetPair (* clo, x *) (MVar clo_x) (
let clo_x := clo_x + 2 in
eos clo_x (
(* Two variables are now in scope, namely [clo] and [x]. *)
let clo := 1 in
let x := 0 in
(* Now, bind [n] variables, informally referred to as [x_1, ..., x_n],
to the [n] tuple projections [pi_1 clo, ..., pi_n clo]. Then, let
[clo] go out of scope, as it is no longer needed. *)
MMultiLet 0 (MProjs n (MVar clo)) (
let clo := n + clo in
let x := n + x in
eos clo (
(* We need [x] to correspond to de Bruijn index 0. Currently, this is
not the case. So, rebind a new variable to [x], and let the old [x]
go out of scope. (Yes, this is a bit subtle.) We use an explicit
[MLet] binding, but in principle, could also use a substitution. *)
MLet (MVar x) (
let x := 1 + x in
eos x (
(* We are finally ready to enter the transformed function body. *)
cct
)))))).
(* [cc] is the main transformation function. *)
Fixpoint cc (n : nat) (t : term) : metal :=
match t with
| Var x =>
(* A variable is mapped to a variable. *)
MVar x
| Lam t =>
(* A lambda-abstraction [Lam t] is mapped to a closure, that is, a tuple
whose 0-th component is a piece of code (a closed lambda-abstraction)
and whose remaining [n] components are the [n] variables numbered 0,
1, ..., n-1. *)
MTuple (
MLam (cc_lam n (cc (n + 1) t)) ::
MVars n
)
| App t1 t2 =>
(* A function application is transformed into a closure invocation. *)
(* Bind [clo] to the closure produced by the (transformed) term [t1]. *)
MLet (cc n t1) (
let clo := 0 in
(* Bind [code] to the 0-th component of this closure. *)
MLet (MProj 0 (MVar clo)) (
let clo := 1 + clo in
let code := 0 in
(* Apply the function [code] to a pair of the closure and the value
produced by the (transformed) term [t2]. Note that both [clo] and
[code] must go out of scope before referring to [t2]. This could
be done in one step by applying the renaming [(+2)], but we prefer
to explicitly use [eos] twice, for greater clarity. *)
MApp
(MVar code)
(MPair
(MVar clo)
(eos clo (eos code (cc n t2)))
)
))
| Let t1 t2 =>
(* A local definition is translated to a local definition. *)
MLet (cc n t1) (cc (n + 1) t2)
end.
(* -------------------------------------------------------------------------- *)
(* -------------------------------------------------------------------------- *)
(* In order to prove that the above program transformation is sound (that is,
semantics-preserving), we must relate the values computed by the source
program with the values computed by the target program. Fortunately, the
relation is simple. (It is, in fact, a function.) The image of a source
closure [Clo t e] under the translation is a closure, represented as a
tuple, whose 0-th component is a lambda-abstraction -- the translation
of [Lam t] -- and whose remaining [n] components are the translation of
the environment [e]. *)
Fixpoint cc_cvalue (cv : cvalue) : mvalue :=
match cv with
| Clo t e =>
let n := length e in
MVTuple (
MVLam (cc_lam n (cc (n + 1) t)) ::
map cc_cvalue e
)
end.
(* The translation of environments is defined pointwise. *)
Definition cc_cenv e :=
map cc_cvalue e.
(* -------------------------------------------------------------------------- *)
(* -------------------------------------------------------------------------- *)
(* This lemma and hint increase the power of the tactic [length]. *)
Lemma length_cc_env:
forall e,
length (cc_cenv e) = length e.
Proof.
intros. unfold cc_cenv. length.
Qed.
Hint Rewrite length_cc_env : length.
(* In this file, we allow [eauto with omega] to use the tactic [length]. *)
Local Hint Extern 1 (_ = _ :> nat) => length : omega.
Local Hint Extern 1 (lt _ _) => length : omega.
(* -------------------------------------------------------------------------- *)
(* -------------------------------------------------------------------------- *)
(* We now check two purely syntactic properties of the transformation. First,
if the source program [t] has [n] free variables, then the transformed
program [cc n t] has [n] free variables, too. Second, in the transformed
program, every lambda-abstraction is closed. *)
(* These proofs are "easy" and "routine", but could have been fairly lengthy
and painful if we had not set up appropriate lemmas and tactics, such as
[fv], beforehand. *)
Lemma fv_cc_lam:
forall m n t,
fv (S n) t ->
1 <= m ->
fv m (cc_lam n t).
Proof.
intros. unfold cc_lam. fv; length. repeat split;
eauto using fv_monotonic with omega.
{ eapply fv_MProjs. fv. omega. }
Qed.
Lemma fv_cc:
forall t n1 n2,
fv n2 t ->
n1 = n2 ->
fv n1 (cc n2 t).
Proof.
induction t; intros; subst; simpl; fv; unpack;
repeat rewrite Nat.add_1_r in *;
repeat split; eauto using fv_monotonic, fv_cc_lam with omega.
Qed.
(* -------------------------------------------------------------------------- *)
(* The following lemmas help reorganize our view of the environment. They are
typically used just before applying the lemma [mbigcbv_eos], that is, when
a variable [x] is about to go out of scope. We then want to organize the
environment so that it has the shape [e1 ++ mv :: e2], where the length of
[e1] is [x], so [mv] is the value that is dropped, and the environment
afterwards is [e1 ++ e2]. *)
Local Lemma access_1:
forall {A} xs (x0 x : A),
x0 :: x :: xs = (x0 :: nil) ++ x :: xs.
Proof.
reflexivity.
Qed.
Local Lemma access_2:
forall {A} xs (x0 x1 x : A),
x0 :: x1 :: x :: xs = (x0 :: x1 :: nil) ++ x :: xs.
Proof.
reflexivity.
Qed.
Local Lemma access_n_plus_1:
forall {A} xs (x : A) ys,
xs ++ x :: ys = (xs ++ x :: nil) ++ ys.
Proof.
intros. rewrite <- app_assoc. reflexivity.
Qed.
(* -------------------------------------------------------------------------- *)
(* We now establish a (forward) semantic preservation statement: if under
environment [e] the source term [t] computes the value [cv], then under
the translation of [e], the translation of [t] computes the translation
of [cv]. *)
(* The proof is fairly routine and uninteresting; it is just a matter of
checking that everything works as expected. A crucial point that helps
keep the proof manageable is that we have defined auxiliary evaluation
lemmas for tuples, projections, [MMultiLet], and so on. *)
(* Although the proof is not "hard", it would be difficult to do by hand, as
it is quite deep and one must keep track of many details, such as the shape
of the runtime environment at every program point -- that is, which de
Bruijn index is bound to which value. An off-by-one error in a de Bruijn
index, or a list that is mistakenly reversed, would be difficult to detect
in a handwritten proof. Machine assistance is valuable in this kind of
exercise. *)
Theorem semantic_preservation:
forall e t cv,
ebigcbv e t cv ->
forall n,
n = length e ->
mbigcbv (cc_cenv e) (cc n t) (cc_cvalue cv).
Proof.
induction 1; intros; simpl.
(* Case: [Var]. Nothing much to say; apply the evaluation rule and
check that its side conditions hold. *)
{ econstructor.
{ length. }
{ subst cv.
erewrite (nth_indep (cc_cenv e)) by length.
unfold cc_cenv. rewrite map_nth. eauto. }
}
(* Case: [Lam]. We have to check that a transformed lambda-abstraction
(as per [cc]) evaluates to a transformed closure (as per [cc_value]).
This is fairly easy. *)
{ subst. econstructor.
(* The code component. We must check that it is closed. *)
{ econstructor.
assert (fv (length e + 1) t).
{ eapply (use_fv_length_cons _ dummy_cvalue); eauto. }
unfold closed. fv. eauto using fv_cc_lam, fv_cc with omega. }
(* The remaining components. *)
{ eapply MBigcbvTuple.
rewrite <- length_cc_env. eauto using MBigcbvVars. }
}
(* Case: [App]. This is where most of the action takes place. We must
essentially "execute" the calling sequence and check that it works
as expected. *)
{ (* Evaluate the first [Let], which binds a variable [clo] to the closure. *)
econstructor. { eauto. }
(* Evaluate the second [Let], which projects the code out of the closure,
and binds the variable [code]. *)
econstructor.
{ econstructor; [| eauto ].
econstructor; simpl; eauto with omega. }
(* We are now looking at a function application. Evaluate in turn the function,
its actual argument, and the call itself. *)
econstructor.
(* The function. *)
{ econstructor; simpl; eauto with omega. }
(* Its argument. *)
{ econstructor.
{ econstructor; simpl; eauto with omega. }
{ (* Skip two [eos] constructs. *)
rewrite access_1. eapply mbigcbv_eos; [ simpl | length ].
eapply mbigcbv_eos with (e1 := nil); [ simpl | length ].
eauto. }
}
(* The call itself. Here, we step into a transformed lambda-abstraction, and
we begin studying how it behaves when executed. *)
unfold cc_lam at 2.
(* Evaluate [MLetPair], which binds [clo] and [x]. *)
eapply MBigcbvLetPair.
{ econstructor; simpl; eauto with omega. }
(* Skip [eos]. *)
rewrite access_2. eapply mbigcbv_eos; [ simpl | eauto ].
(* Evaluate [MMultiLet]. *)
eapply MBigcbvMultiLetProjs.
{ econstructor; simpl; eauto with omega. }
{ length. }
(* Skip [eos]. *)
rewrite access_n_plus_1. eapply mbigcbv_eos; [| length ].
rewrite app_nil_r, Nat.add_0_r.
(* Evaluate the new binding of [x]. *)
econstructor.
{ econstructor.
{ length. }
{ rewrite app_nth by (rewrite map_length; omega). simpl. eauto. }
}
(* Skip [eos]. *)
rewrite app_comm_cons. eapply mbigcbv_eos; [ rewrite app_nil_r | length ].
(* Evaluate the function body. *)
eauto with omega. }
(* Case: [Let]. Immediate. *)
{ econstructor; eauto with omega. }
Qed.