Add new OCaml files.

2017-10-11 19:47:20 +02:00 · 2017-10-11 19:47:20 +02:00 · b88b92d33c
commit b88b92d33c
parent 555822838f
3 changed files with 575 additions and 0 deletions
--- a/ocaml/EvalCBNCPS.ml
+++ b/ocaml/EvalCBNCPS.ml
@ -0,0 +1,164 @@
+(* -------------------------------------------------------------------------- *)
+
+(* The type of lambda-terms, in de Bruijn's representation. *)
+
+type var = int (* a de Bruijn index *)
+type term =
+  | Var of var
+  | Lam of (* bind: *) term
+  | App of term * term
+  | Let of (* bind: *) term * term
+
+(* -------------------------------------------------------------------------- *)
+
+(* Under a call-by-name regime, in a function call, the actual argument is not
+   evaluated immediately; instead, a thunk is built (a pair of the argument
+   and the environment in which it must be evaluated). Thus, an environment is
+   a list of thunks. As in call-by-value, a closure is a pair of a term and an
+   environment. (Closures and thunks differ in that a closure binds a
+   variable, the formal argument, in the term. A thunk does not.) *)
+
+type cvalue =
+  | Clo of (* bind: *) term * cenv
+
+and cenv =
+  thunk list
+
+and thunk =
+  | Thunk of term * cenv
+
+(* -------------------------------------------------------------------------- *)
+
+(* Environments. *)
+
+let empty : cenv =
+  []
+
+exception RuntimeError
+
+let lookup (e : cenv) (x : var) : thunk =
+  try
+    List.nth e x
+  with Failure _ ->
+    raise RuntimeError
+
+(* -------------------------------------------------------------------------- *)
+
+(* An environment-based big-step call-by-name interpreter. *)
+
+let rec eval (e : cenv) (t : term) : cvalue =
+  match t with
+  | Var x ->
+      let Thunk (t, e) = lookup e x in
+      eval e t
+  | Lam t ->
+      Clo (t, e)
+  | App (t1, t2) ->
+      let cv1 = eval e t1 in
+      let Clo (u1, e') = cv1 in
+      eval (Thunk(t2, e) :: e') u1
+  | Let (t1, t2) ->
+      eval (Thunk (t1, e) :: e) t2
+
+(* -------------------------------------------------------------------------- *)
+
+(* The CPS-transformed interpreter. *)
+
+let rec evalk (e : cenv) (t : term) (k : cvalue -> 'a) : 'a =
+  match t with
+  | Var x ->
+      let Thunk (t, e) = lookup e x in
+      evalk e t k
+  | Lam t ->
+      k (Clo (t, e))
+  | App (t1, t2) ->
+      evalk e t1 (fun cv1 ->
+      let Clo (u1, e') = cv1 in
+      evalk (Thunk(t2, e) :: e') u1 k)
+  | Let (t1, t2) ->
+      evalk (Thunk (t1, e) :: e) t2 k
+
+let eval (e : cenv) (t : term) : cvalue =
+  evalk e t (fun cv -> cv)
+
+(* -------------------------------------------------------------------------- *)
+
+(* The CPS-transformed, defunctionalized interpreter. *)
+
+type kont =
+  | AppL of { e: cenv; t2: term; k: kont }
+  | Init
+
+let rec evalkd (e : cenv) (t : term) (k : kont) : cvalue =
+  match t with
+  | Var x ->
+      let Thunk (t, e) = lookup e x in
+      evalkd e t k
+  | Lam t ->
+      apply k (Clo (t, e))
+  | App (t1, t2) ->
+      evalkd e t1 (AppL { e; t2; k })
+  | Let (t1, t2) ->
+      evalkd (Thunk (t1, e) :: e) t2 k
+
+and apply (k : kont) (cv : cvalue) : cvalue =
+  match k with
+  | AppL { e; t2; k } ->
+      let cv1 = cv in
+      let Clo (u1, e') = cv1 in
+      evalkd (Thunk(t2, e) :: e') u1 k
+  | Init ->
+      cv
+
+let eval (e : cenv) (t : term) : cvalue =
+  evalkd e t Init
+
+(* -------------------------------------------------------------------------- *)
+
+(* Because [apply] has only one call site, it can be inlined, yielding a
+   slightly more compact and readable definition. *)
+
+let rec evalkd (e : cenv) (t : term) (k : kont) : cvalue =
+  match t, k with
+  | Var x, _ ->
+      let Thunk (t, e) = lookup e x in
+      evalkd e t k
+  | Lam t, AppL { e; t2; k } ->
+      let cv1 = Clo (t, e) in
+      let Clo (u1, e') = cv1 in
+      evalkd (Thunk(t2, e) :: e') u1 k
+  | Lam t, Init ->
+      Clo (t, e)
+  | App (t1, t2), _ ->
+      evalkd e t1 (AppL { e; t2; k })
+  | Let (t1, t2), _ ->
+      evalkd (Thunk (t1, e) :: e) t2 k
+
+let eval (e : cenv) (t : term) : cvalue =
+  evalkd e t Init
+
+(* -------------------------------------------------------------------------- *)
+
+(* The type [kont] is isomorphic to [(cenv * term) list]. Using the latter
+   type makes the code slightly prettier, although slightly less efficient. *)
+
+(* At this point, one recognizes Krivine's machine. *)
+
+let rec evalkd (e : cenv) (t : term) (k : (cenv * term) list) : cvalue =
+  match t, k with
+  | Var x, _ ->
+      let Thunk (t, e) = lookup e x in
+      evalkd e t k
+  | Lam t, (e, t2) :: k ->
+      let cv1 = Clo (t, e) in
+      let Clo (u1, e') = cv1 in
+      evalkd (Thunk(t2, e) :: e') u1 k
+  | Lam t, [] ->
+      Clo (t, e)
+  | App (t1, t2), _ ->
+      evalkd e t1 ((e, t2) :: k)
+  | Let (t1, t2), _ ->
+      evalkd (Thunk (t1, e) :: e) t2 k
+
+let eval (e : cenv) (t : term) : cvalue =
+  evalkd e t []
--- a/ocaml/EvalCBVCPS.ml
+++ b/ocaml/EvalCBVCPS.ml
@ -0,0 +1,208 @@
+(* -------------------------------------------------------------------------- *)
+
+(* The type of lambda-terms, in de Bruijn's representation. *)
+
+type var = int (* a de Bruijn index *)
+type term =
+  | Var of var
+  | Lam of (* bind: *) term
+  | App of term * term
+  | Let of (* bind: *) term * term
+
+(* -------------------------------------------------------------------------- *)
+
+(* An environment-based big-step interpreter. This is the same interpreter
+   that we programmed in Coq, except here, in OCaml, fuel is not needed. *)
+
+type cvalue =
+  | Clo of (* bind: *) term * cenv
+
+and cenv =
+  cvalue list
+
+let empty : cenv =
+  []
+
+exception RuntimeError
+
+let lookup (e : cenv) (x : var) : cvalue =
+  try
+    List.nth e x
+  with Failure _ ->
+    raise RuntimeError
+
+let rec eval (e : cenv) (t : term) : cvalue =
+  match t with
+  | Var x ->
+      lookup e x
+  | Lam t ->
+      Clo (t, e)
+  | App (t1, t2) ->
+      let cv1 = eval e t1 in
+      let cv2 = eval e t2 in
+      let Clo (u1, e') = cv1 in
+      eval (cv2 :: e') u1
+  | Let (t1, t2) ->
+      eval (eval e t1 :: e) t2
+
+(* -------------------------------------------------------------------------- *)
+
+(* Term/value/environment printers. *)
+
+open Printf
+
+let rec print_term f = function
+  | Var x ->
+      fprintf f "(Var %d)" x
+  | Lam t ->
+      fprintf f "(Lam %a)" print_term t
+  | App (t1, t2) ->
+      fprintf f "(App %a %a)" print_term t1 print_term t2
+  | Let (t1, t2) ->
+      fprintf f "(Let %a %a)" print_term t1 print_term t2
+
+let rec print_cvalue f = function
+  | Clo (t, e) ->
+      fprintf f "< %a | %a >" print_term t print_cenv e
+
+and print_cenv f = function
+  | [] ->
+      fprintf f "[]"
+  | cv :: e ->
+      fprintf f "%a :: %a" print_cvalue cv print_cenv e
+
+let print_cvalue cv =
+  fprintf stdout "%a\n" print_cvalue cv
+
+(* -------------------------------------------------------------------------- *)
+
+(* A tiny test suite. *)
+
+let id =
+  Lam (Var 0)
+
+let idid =
+  App (id, id)
+
+let apply =
+  Lam (Lam (App (Var 1, Var 0)))
+
+let test1 eval t =
+  print_cvalue (eval empty t)
+
+let test name eval =
+  printf "Testing %s...\n%!" name;
+  test1 eval idid;
+  test1 eval (App (apply, id));
+  test1 eval (App (App (apply, id), id));
+  ()
+
+(* -------------------------------------------------------------------------- *)
+
+(* Test. *)
+
+let () =
+  test "the direct-style evaluator" eval
+
+(* -------------------------------------------------------------------------- *)
+
+(* A CPS-transformed, environment-based big-step interpreter. *)
+
+(* In this code, every recursive call to [evalk] is a tail call. Thus,
+   it is compiled by the OCaml compiler to a loop, and requires only O(1)
+   space on the OCaml stack. *)
+
+let rec evalk (e : cenv) (t : term) (k : cvalue -> 'a) : 'a =
+  match t with
+  | Var x ->
+      k (lookup e x)
+  | Lam t ->
+      k (Clo (t, e))
+  | App (t1, t2) ->
+      evalk e t1 (fun cv1 ->
+      evalk e t2 (fun cv2 ->
+      let Clo (u1, e') = cv1 in
+      evalk (cv2 :: e') u1 k))
+  | Let (t1, t2) ->
+      evalk e t1 (fun cv1 ->
+      evalk (cv1 :: e) t2 k)
+
+(* It is possible to explicitly assert that these calls are tail calls.
+   The compiler would tell us if we were wrong. *)
+
+let rec evalk (e : cenv) (t : term) (k : cvalue -> 'a) : 'a =
+  match t with
+  | Var x ->
+      (k[@tailcall]) (lookup e x)
+  | Lam t ->
+      (k[@tailcall]) (Clo (t, e))
+  | App (t1, t2) ->
+      (evalk[@tailcall]) e t1 (fun cv1 ->
+      (evalk[@tailcall]) e t2 (fun cv2 ->
+      let Clo (u1, e') = cv1 in
+      (evalk[@tailcall]) (cv2 :: e') u1 k))
+  | Let (t1, t2) ->
+      (evalk[@tailcall]) e t1 (fun cv1 ->
+      (evalk[@tailcall]) (cv1 :: e) t2 k)
+
+let eval (e : cenv) (t : term) : cvalue =
+  evalk e t (fun cv -> cv)
+
+(* -------------------------------------------------------------------------- *)
+
+(* Test. *)
+
+let () =
+  test "the CPS evaluator" eval
+
+(* -------------------------------------------------------------------------- *)
+
+(* The above code uses heap-allocated closures, which form a linked list in the
+   heap. In fact, the interpreter's "stack" is now heap-allocated. To see this
+   more clearly, let us defunctionalize the CPS-transformed interpreter. *)
+
+(* There are four places in the above code where an anonymous continuation is
+   built, so defunctionalization yields a data type of four possible kinds of
+   continuations. This data type describes a linked list of stack frames! *)
+
+type kont =
+  | AppL of { e: cenv; t2: term; k: kont }
+  | AppR of {       cv1: cvalue; k: kont }
+  | LetL of { e: cenv; t2: term; k: kont }
+  | Init
+
+let rec evalkd (e : cenv) (t : term) (k : kont) : cvalue =
+  match t with
+  | Var x ->
+      apply k (lookup e x)
+  | Lam t ->
+      apply k (Clo (t, e))
+  | App (t1, t2) ->
+      evalkd e t1 (AppL { e; t2; k })
+  | Let (t1, t2) ->
+      evalkd e t1 (LetL { e; t2; k })
+
+and apply (k : kont) (cv : cvalue) : cvalue =
+  match k with
+  | AppL { e; t2; k } ->
+      let cv1 = cv in
+      evalkd e t2 (AppR { cv1; k })
+  | AppR { cv1; k } ->
+      let cv2 = cv in
+      let Clo (u1, e') = cv1 in
+      evalkd (cv2 :: e') u1 k
+  | LetL { e; t2; k } ->
+      let cv1 = cv in
+      evalkd (cv1 :: e) t2 k
+  | Init ->
+      cv
+
+let eval e t =
+  evalkd e t Init
+
+(* -------------------------------------------------------------------------- *)
+
+(* Test. *)
+
+let () =
+  test "the defunctionalized CPS evaluator" eval
--- a/ocaml/Graph.ml
+++ b/ocaml/Graph.ml
@ -0,0 +1,203 @@
+open Printf
+
+(* -------------------------------------------------------------------------- *)
+
+(* A simple type of binary trees. *)
+
+type tree =
+  | Leaf
+  | Node  of { data: int; left: tree; right: tree }
+
+(* -------------------------------------------------------------------------- *)
+
+(* Constructors. *)
+
+let node data left right =
+  Node { data; left; right }
+
+let singleton data =
+  node data Leaf Leaf
+
+(* -------------------------------------------------------------------------- *)
+
+(* A sample tree. *)
+
+let christmas =
+  node 6
+    (node 2 (singleton 0) (singleton 1))
+    (node 5 (singleton 3) (singleton 4))
+
+(* -------------------------------------------------------------------------- *)
+
+(* A test procedure. *)
+
+let test name walk =
+  printf "Testing %s...\n%!" name;
+  walk christmas;
+  walk christmas;
+  flush stdout
+
+(* -------------------------------------------------------------------------- *)
+
+(* A recursive depth-first traversal, with postfix printing. *)
+
+let rec walk (t : tree) : unit =
+  match t with
+  | Leaf ->
+      ()
+  | Node { data; left; right } ->
+      walk left;
+      walk right;
+      printf "%d\n" data
+
+let () =
+  test "walk" walk
+
+(* -------------------------------------------------------------------------- *)
+
+(* A CPS traversal. *)
+
+let rec walkk (t : tree) (k : unit -> 'a) : 'a =
+  match t with
+  | Leaf ->
+      k()
+  | Node { data; left; right } ->
+      walkk left (fun () ->
+      walkk right (fun () ->
+      printf "%d\n" data;
+      k()))
+
+let walk t =
+  walkk t (fun t -> t)
+
+let () =
+  test "walkk" walk
+
+(* -------------------------------------------------------------------------- *)
+
+(* A CPS-defunctionalized traversal. *)
+
+type kont =
+  | Init
+  | GoneL of { data: int; tail: kont; right: tree }
+  | GoneR of { data: int;              tail: kont }
+
+let rec walkkd (t : tree) (k : kont) : unit =
+  match t with
+  | Leaf ->
+      apply k ()
+  | Node { data; left; right } ->
+      walkkd left (GoneL { data; tail = k; right })
+
+and apply k () =
+  match k with
+  | Init ->
+      ()
+  | GoneL { data; tail; right } ->
+      walkkd right (GoneR { data; tail })
+  | GoneR { data; tail } ->
+      printf "%d\n" data;
+      apply tail ()
+
+let walk t =
+  walkkd t Init
+
+let () =
+  test "walkkd" walk
+
+(* CPS, defunctionalized, with in-place allocation of continuations. *)
+
+(* [Init] is encoded by [Leaf].
+
+   [GoneL { data; tail; right }] is encoded by:
+   - setting [status] to [GoneL]; and
+   - storing [tail] in the [left] field.
+   - the [data] and [right] fields retain their original value.
+
+   [GoneR { data; tail }] is encoded by:
+   - setting [status] to [GoneR]; and
+   - storing [tail] in the [right] field.
+   - the [data] and [left] fields retain their original value.
+
+   The [status] field is meaningful only when the memory block is
+   being viewed as a continuation. If it is being viewed as a tree,
+   then (by convention) [status] must be [GoneL]. (We could also
+   let the type [status] have three values, but I prefer to use just
+   two, for the sake of economy.)
+
+   Does that sound crazy? Well, it is, in a way. *)
+
+type status = GoneL | GoneR
+type mtree  =  Leaf | Node of {
+    data: int;           mutable status: status;
+    mutable left: mtree; mutable right: mtree
+  }
+type mkont = mtree
+
+(* Constructors. *)
+
+let node data left right =
+  Node { data; status = GoneL; left; right }
+
+let singleton data =
+  node data Leaf Leaf
+
+(* A sample tree. *)
+
+let christmas =
+  node 6
+    (node 2 (singleton 0) (singleton 1))
+    (node 5 (singleton 3) (singleton 4))
+
+(* A test. *)
+
+let test name walk =
+  printf "Testing %s...\n%!" name;
+  walk christmas;
+  walk christmas;
+  flush stdout
+
+(* The code. *)
+
+let rec walkkdi (t : mtree) (k : mkont) : unit =
+  match t with
+  | Leaf ->
+      (* We decide to let [apply] takes a tree as a second argument,
+         instead of just a unit value. Indeed, in order to restore
+         the [left] or [right] fields of [k], we need the address
+         of the child [t] out of which we are coming. *)
+      apply k t
+  | Node ({ left; _ } as n) ->
+      (* At this point, [t] is a tree.
+         [n] is a name for its root record. *)
+      (* Change this tree to a [GoneL] continuation. *)
+      assert (n.status = GoneL);
+      n.left (* n.tail *) <- k;
+      (* [t] now represents a continuation. Go down into the left
+         child, with this continuation. *)
+      walkkdi left (t : mkont)
+
+and apply (k : mkont) (child : mtree) : unit =
+  match k with
+  | Leaf -> ()
+  | Node ({ status = GoneL; left = tail; right; _ } as n) ->
+      (* We are popping a [GoneL] frame, that is, coming out of
+         a left child. *)
+      n.status <- GoneR;       (* update continuation! *)
+      n.left <- child;    (* restore orig. left child! *)
+      n.right (* n.tail *) <- tail;
+      (* [k] now represents a [GoneR] continuation. Go down into
+         the right child, with [k] as a continuation. *)
+      walkkdi right k
+  | Node ({ data; status = GoneR; right = tail; _ } as n) ->
+      printf "%d\n" data;
+      n.status <- GoneL;     (* change back to a tree! *)
+      n.right <- child;  (* restore orig. right child! *)
+      (* [k] now represents a valid tree again. *)
+      apply tail (k : mtree)
+
+let walk t =
+  walkkdi t Leaf
+
+let () =
+  test "walkkdi" walk