diff --git a/ocaml/EvalCBNCPS.ml b/ocaml/EvalCBNCPS.ml new file mode 100644 index 0000000..4945eeb --- /dev/null +++ b/ocaml/EvalCBNCPS.ml @@ -0,0 +1,164 @@ +(* -------------------------------------------------------------------------- *) + +(* The type of lambda-terms, in de Bruijn's representation. *) + +type var = int (* a de Bruijn index *) +type term = + | Var of var + | Lam of (* bind: *) term + | App of term * term + | Let of (* bind: *) term * term + +(* -------------------------------------------------------------------------- *) + +(* Under a call-by-name regime, in a function call, the actual argument is not + evaluated immediately; instead, a thunk is built (a pair of the argument + and the environment in which it must be evaluated). Thus, an environment is + a list of thunks. As in call-by-value, a closure is a pair of a term and an + environment. (Closures and thunks differ in that a closure binds a + variable, the formal argument, in the term. A thunk does not.) *) + +type cvalue = + | Clo of (* bind: *) term * cenv + +and cenv = + thunk list + +and thunk = + | Thunk of term * cenv + +(* -------------------------------------------------------------------------- *) + +(* Environments. *) + +let empty : cenv = + [] + +exception RuntimeError + +let lookup (e : cenv) (x : var) : thunk = + try + List.nth e x + with Failure _ -> + raise RuntimeError + +(* -------------------------------------------------------------------------- *) + +(* An environment-based big-step call-by-name interpreter. *) + +let rec eval (e : cenv) (t : term) : cvalue = + match t with + | Var x -> + let Thunk (t, e) = lookup e x in + eval e t + | Lam t -> + Clo (t, e) + | App (t1, t2) -> + let cv1 = eval e t1 in + let Clo (u1, e') = cv1 in + eval (Thunk(t2, e) :: e') u1 + | Let (t1, t2) -> + eval (Thunk (t1, e) :: e) t2 + +(* -------------------------------------------------------------------------- *) + +(* The CPS-transformed interpreter. *) + +let rec evalk (e : cenv) (t : term) (k : cvalue -> 'a) : 'a = + match t with + | Var x -> + let Thunk (t, e) = lookup e x in + evalk e t k + | Lam t -> + k (Clo (t, e)) + | App (t1, t2) -> + evalk e t1 (fun cv1 -> + let Clo (u1, e') = cv1 in + evalk (Thunk(t2, e) :: e') u1 k) + | Let (t1, t2) -> + evalk (Thunk (t1, e) :: e) t2 k + +let eval (e : cenv) (t : term) : cvalue = + evalk e t (fun cv -> cv) + +(* -------------------------------------------------------------------------- *) + +(* The CPS-transformed, defunctionalized interpreter. *) + +type kont = + | AppL of { e: cenv; t2: term; k: kont } + | Init + +let rec evalkd (e : cenv) (t : term) (k : kont) : cvalue = + match t with + | Var x -> + let Thunk (t, e) = lookup e x in + evalkd e t k + | Lam t -> + apply k (Clo (t, e)) + | App (t1, t2) -> + evalkd e t1 (AppL { e; t2; k }) + | Let (t1, t2) -> + evalkd (Thunk (t1, e) :: e) t2 k + +and apply (k : kont) (cv : cvalue) : cvalue = + match k with + | AppL { e; t2; k } -> + let cv1 = cv in + let Clo (u1, e') = cv1 in + evalkd (Thunk(t2, e) :: e') u1 k + | Init -> + cv + +let eval (e : cenv) (t : term) : cvalue = + evalkd e t Init + +(* -------------------------------------------------------------------------- *) + +(* Because [apply] has only one call site, it can be inlined, yielding a + slightly more compact and readable definition. *) + +let rec evalkd (e : cenv) (t : term) (k : kont) : cvalue = + match t, k with + | Var x, _ -> + let Thunk (t, e) = lookup e x in + evalkd e t k + | Lam t, AppL { e; t2; k } -> + let cv1 = Clo (t, e) in + let Clo (u1, e') = cv1 in + evalkd (Thunk(t2, e) :: e') u1 k + | Lam t, Init -> + Clo (t, e) + | App (t1, t2), _ -> + evalkd e t1 (AppL { e; t2; k }) + | Let (t1, t2), _ -> + evalkd (Thunk (t1, e) :: e) t2 k + +let eval (e : cenv) (t : term) : cvalue = + evalkd e t Init + +(* -------------------------------------------------------------------------- *) + +(* The type [kont] is isomorphic to [(cenv * term) list]. Using the latter + type makes the code slightly prettier, although slightly less efficient. *) + +(* At this point, one recognizes Krivine's machine. *) + +let rec evalkd (e : cenv) (t : term) (k : (cenv * term) list) : cvalue = + match t, k with + | Var x, _ -> + let Thunk (t, e) = lookup e x in + evalkd e t k + | Lam t, (e, t2) :: k -> + let cv1 = Clo (t, e) in + let Clo (u1, e') = cv1 in + evalkd (Thunk(t2, e) :: e') u1 k + | Lam t, [] -> + Clo (t, e) + | App (t1, t2), _ -> + evalkd e t1 ((e, t2) :: k) + | Let (t1, t2), _ -> + evalkd (Thunk (t1, e) :: e) t2 k + +let eval (e : cenv) (t : term) : cvalue = + evalkd e t [] diff --git a/ocaml/EvalCBVCPS.ml b/ocaml/EvalCBVCPS.ml new file mode 100644 index 0000000..1bba7eb --- /dev/null +++ b/ocaml/EvalCBVCPS.ml @@ -0,0 +1,208 @@ +(* -------------------------------------------------------------------------- *) + +(* The type of lambda-terms, in de Bruijn's representation. *) + +type var = int (* a de Bruijn index *) +type term = + | Var of var + | Lam of (* bind: *) term + | App of term * term + | Let of (* bind: *) term * term + +(* -------------------------------------------------------------------------- *) + +(* An environment-based big-step interpreter. This is the same interpreter + that we programmed in Coq, except here, in OCaml, fuel is not needed. *) + +type cvalue = + | Clo of (* bind: *) term * cenv + +and cenv = + cvalue list + +let empty : cenv = + [] + +exception RuntimeError + +let lookup (e : cenv) (x : var) : cvalue = + try + List.nth e x + with Failure _ -> + raise RuntimeError + +let rec eval (e : cenv) (t : term) : cvalue = + match t with + | Var x -> + lookup e x + | Lam t -> + Clo (t, e) + | App (t1, t2) -> + let cv1 = eval e t1 in + let cv2 = eval e t2 in + let Clo (u1, e') = cv1 in + eval (cv2 :: e') u1 + | Let (t1, t2) -> + eval (eval e t1 :: e) t2 + +(* -------------------------------------------------------------------------- *) + +(* Term/value/environment printers. *) + +open Printf + +let rec print_term f = function + | Var x -> + fprintf f "(Var %d)" x + | Lam t -> + fprintf f "(Lam %a)" print_term t + | App (t1, t2) -> + fprintf f "(App %a %a)" print_term t1 print_term t2 + | Let (t1, t2) -> + fprintf f "(Let %a %a)" print_term t1 print_term t2 + +let rec print_cvalue f = function + | Clo (t, e) -> + fprintf f "< %a | %a >" print_term t print_cenv e + +and print_cenv f = function + | [] -> + fprintf f "[]" + | cv :: e -> + fprintf f "%a :: %a" print_cvalue cv print_cenv e + +let print_cvalue cv = + fprintf stdout "%a\n" print_cvalue cv + +(* -------------------------------------------------------------------------- *) + +(* A tiny test suite. *) + +let id = + Lam (Var 0) + +let idid = + App (id, id) + +let apply = + Lam (Lam (App (Var 1, Var 0))) + +let test1 eval t = + print_cvalue (eval empty t) + +let test name eval = + printf "Testing %s...\n%!" name; + test1 eval idid; + test1 eval (App (apply, id)); + test1 eval (App (App (apply, id), id)); + () + +(* -------------------------------------------------------------------------- *) + +(* Test. *) + +let () = + test "the direct-style evaluator" eval + +(* -------------------------------------------------------------------------- *) + +(* A CPS-transformed, environment-based big-step interpreter. *) + +(* In this code, every recursive call to [evalk] is a tail call. Thus, + it is compiled by the OCaml compiler to a loop, and requires only O(1) + space on the OCaml stack. *) + +let rec evalk (e : cenv) (t : term) (k : cvalue -> 'a) : 'a = + match t with + | Var x -> + k (lookup e x) + | Lam t -> + k (Clo (t, e)) + | App (t1, t2) -> + evalk e t1 (fun cv1 -> + evalk e t2 (fun cv2 -> + let Clo (u1, e') = cv1 in + evalk (cv2 :: e') u1 k)) + | Let (t1, t2) -> + evalk e t1 (fun cv1 -> + evalk (cv1 :: e) t2 k) + +(* It is possible to explicitly assert that these calls are tail calls. + The compiler would tell us if we were wrong. *) + +let rec evalk (e : cenv) (t : term) (k : cvalue -> 'a) : 'a = + match t with + | Var x -> + (k[@tailcall]) (lookup e x) + | Lam t -> + (k[@tailcall]) (Clo (t, e)) + | App (t1, t2) -> + (evalk[@tailcall]) e t1 (fun cv1 -> + (evalk[@tailcall]) e t2 (fun cv2 -> + let Clo (u1, e') = cv1 in + (evalk[@tailcall]) (cv2 :: e') u1 k)) + | Let (t1, t2) -> + (evalk[@tailcall]) e t1 (fun cv1 -> + (evalk[@tailcall]) (cv1 :: e) t2 k) + +let eval (e : cenv) (t : term) : cvalue = + evalk e t (fun cv -> cv) + +(* -------------------------------------------------------------------------- *) + +(* Test. *) + +let () = + test "the CPS evaluator" eval + +(* -------------------------------------------------------------------------- *) + +(* The above code uses heap-allocated closures, which form a linked list in the + heap. In fact, the interpreter's "stack" is now heap-allocated. To see this + more clearly, let us defunctionalize the CPS-transformed interpreter. *) + +(* There are four places in the above code where an anonymous continuation is + built, so defunctionalization yields a data type of four possible kinds of + continuations. This data type describes a linked list of stack frames! *) + +type kont = + | AppL of { e: cenv; t2: term; k: kont } + | AppR of { cv1: cvalue; k: kont } + | LetL of { e: cenv; t2: term; k: kont } + | Init + +let rec evalkd (e : cenv) (t : term) (k : kont) : cvalue = + match t with + | Var x -> + apply k (lookup e x) + | Lam t -> + apply k (Clo (t, e)) + | App (t1, t2) -> + evalkd e t1 (AppL { e; t2; k }) + | Let (t1, t2) -> + evalkd e t1 (LetL { e; t2; k }) + +and apply (k : kont) (cv : cvalue) : cvalue = + match k with + | AppL { e; t2; k } -> + let cv1 = cv in + evalkd e t2 (AppR { cv1; k }) + | AppR { cv1; k } -> + let cv2 = cv in + let Clo (u1, e') = cv1 in + evalkd (cv2 :: e') u1 k + | LetL { e; t2; k } -> + let cv1 = cv in + evalkd (cv1 :: e) t2 k + | Init -> + cv + +let eval e t = + evalkd e t Init + +(* -------------------------------------------------------------------------- *) + +(* Test. *) + +let () = + test "the defunctionalized CPS evaluator" eval diff --git a/ocaml/Graph.ml b/ocaml/Graph.ml new file mode 100644 index 0000000..3b4f08c --- /dev/null +++ b/ocaml/Graph.ml @@ -0,0 +1,203 @@ +open Printf + +(* -------------------------------------------------------------------------- *) + +(* A simple type of binary trees. *) + +type tree = + | Leaf + | Node of { data: int; left: tree; right: tree } + +(* -------------------------------------------------------------------------- *) + +(* Constructors. *) + +let node data left right = + Node { data; left; right } + +let singleton data = + node data Leaf Leaf + +(* -------------------------------------------------------------------------- *) + +(* A sample tree. *) + +let christmas = + node 6 + (node 2 (singleton 0) (singleton 1)) + (node 5 (singleton 3) (singleton 4)) + +(* -------------------------------------------------------------------------- *) + +(* A test procedure. *) + +let test name walk = + printf "Testing %s...\n%!" name; + walk christmas; + walk christmas; + flush stdout + +(* -------------------------------------------------------------------------- *) + +(* A recursive depth-first traversal, with postfix printing. *) + +let rec walk (t : tree) : unit = + match t with + | Leaf -> + () + | Node { data; left; right } -> + walk left; + walk right; + printf "%d\n" data + +let () = + test "walk" walk + +(* -------------------------------------------------------------------------- *) + +(* A CPS traversal. *) + +let rec walkk (t : tree) (k : unit -> 'a) : 'a = + match t with + | Leaf -> + k() + | Node { data; left; right } -> + walkk left (fun () -> + walkk right (fun () -> + printf "%d\n" data; + k())) + +let walk t = + walkk t (fun t -> t) + +let () = + test "walkk" walk + +(* -------------------------------------------------------------------------- *) + +(* A CPS-defunctionalized traversal. *) + +type kont = + | Init + | GoneL of { data: int; tail: kont; right: tree } + | GoneR of { data: int; tail: kont } + +let rec walkkd (t : tree) (k : kont) : unit = + match t with + | Leaf -> + apply k () + | Node { data; left; right } -> + walkkd left (GoneL { data; tail = k; right }) + +and apply k () = + match k with + | Init -> + () + | GoneL { data; tail; right } -> + walkkd right (GoneR { data; tail }) + | GoneR { data; tail } -> + printf "%d\n" data; + apply tail () + +let walk t = + walkkd t Init + +let () = + test "walkkd" walk + +(* CPS, defunctionalized, with in-place allocation of continuations. *) + +(* [Init] is encoded by [Leaf]. + + [GoneL { data; tail; right }] is encoded by: + - setting [status] to [GoneL]; and + - storing [tail] in the [left] field. + - the [data] and [right] fields retain their original value. + + [GoneR { data; tail }] is encoded by: + - setting [status] to [GoneR]; and + - storing [tail] in the [right] field. + - the [data] and [left] fields retain their original value. + + The [status] field is meaningful only when the memory block is + being viewed as a continuation. If it is being viewed as a tree, + then (by convention) [status] must be [GoneL]. (We could also + let the type [status] have three values, but I prefer to use just + two, for the sake of economy.) + + Does that sound crazy? Well, it is, in a way. *) + +type status = GoneL | GoneR +type mtree = Leaf | Node of { + data: int; mutable status: status; + mutable left: mtree; mutable right: mtree + } +type mkont = mtree + +(* Constructors. *) + +let node data left right = + Node { data; status = GoneL; left; right } + +let singleton data = + node data Leaf Leaf + +(* A sample tree. *) + +let christmas = + node 6 + (node 2 (singleton 0) (singleton 1)) + (node 5 (singleton 3) (singleton 4)) + +(* A test. *) + +let test name walk = + printf "Testing %s...\n%!" name; + walk christmas; + walk christmas; + flush stdout + +(* The code. *) + +let rec walkkdi (t : mtree) (k : mkont) : unit = + match t with + | Leaf -> + (* We decide to let [apply] takes a tree as a second argument, + instead of just a unit value. Indeed, in order to restore + the [left] or [right] fields of [k], we need the address + of the child [t] out of which we are coming. *) + apply k t + | Node ({ left; _ } as n) -> + (* At this point, [t] is a tree. + [n] is a name for its root record. *) + (* Change this tree to a [GoneL] continuation. *) + assert (n.status = GoneL); + n.left (* n.tail *) <- k; + (* [t] now represents a continuation. Go down into the left + child, with this continuation. *) + walkkdi left (t : mkont) + +and apply (k : mkont) (child : mtree) : unit = + match k with + | Leaf -> () + | Node ({ status = GoneL; left = tail; right; _ } as n) -> + (* We are popping a [GoneL] frame, that is, coming out of + a left child. *) + n.status <- GoneR; (* update continuation! *) + n.left <- child; (* restore orig. left child! *) + n.right (* n.tail *) <- tail; + (* [k] now represents a [GoneR] continuation. Go down into + the right child, with [k] as a continuation. *) + walkkdi right k + | Node ({ data; status = GoneR; right = tail; _ } as n) -> + printf "%d\n" data; + n.status <- GoneL; (* change back to a tree! *) + n.right <- child; (* restore orig. right child! *) + (* [k] now represents a valid tree again. *) + apply tail (k : mtree) + +let walk t = + walkkdi t Leaf + +let () = + test "walkkdi" walk