diff --git a/Makefile b/Makefile index e22c835..98639b2 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,9 @@ +.PHONY: html clean + +html: README.html + +clean: + rm -f README.html *~ README.html: README.md pandoc -s -f markdown -t html -o $@ $< diff --git a/README.md b/README.md index 3957978..b751950 100644 --- a/README.md +++ b/README.md @@ -50,16 +50,26 @@ reasoning by induction on the structure of types. Finally, we introduce subtyping, row polymorphism, and illustrate the problems induced by side effects (references) and the need for the value restriction. -The third part of the course describes more advanced features of type -systems: exceptions and effect handlers, including their typechecking and -static analyses: type inference, data flow and control flow analyses. -Finally, it introduces dependent types and refinement types. +The third part of the course introduces "rich" type systems designed +to guarantee extra properties in addition to safety: principality, +information hiding, modularity, extensionality, purity, control of +effects, algorithmic invariants, complexity, resource usage, or full +functional correctness. The expressivity of these systems sometimes +endangers the tractability, or even the feasibility, of type checking +and type inference: a common thread between these lectures discusses +the tradeoffs made on the design of these systems to balance +expressivity and tractability. The last part focuses on the use of dependent types for programming: effectful programming with monads and algebraic effects; tagless interpreters; programming with total functions; generic programming. We also show the limits of dependently-typed functional programming. +## Project + +The [programming project](project/) is now available! +The deadline is **Friday, February 16, 2018**. + ## Approximate syllabus ### Functional Programming: Under the Hood @@ -131,13 +141,16 @@ We also show the limits of dependently-typed functional programming. * See exercises in [course notes](http://gallium.inria.fr/~remy/mpri/cours.pdf) -### Advanced Aspects of Type Systems +### Rich types, tractable typing -* Exceptions and effect handlers. (Compiled away via CPS.) -* Typechecking exceptions and handlers. -* Type inference. (ML. Bidirectional. Elaboration.) -* Data/control flow analysis. -* Functional correctness. Intro to dependent/refinement types. +* (08/12/2017) + [Introduction](slides/yrg-00-introduction.pdf), + [ML and Type inference](slides/yrg-01-type-inference.pdf) +* Subtyping +* Effects and resources +* Modules +* Dependent types +* Functional correctness ### Dependently-typed Functional Programming @@ -151,7 +164,7 @@ We also show the limits of dependently-typed functional programming. Two written exams (a partial exam on Friday Dec 1 and a final exam) and one programming project or several programming exercises are used to evaluate -the students that follow the full course. Only the partial exam will count +the students that follow the full course. Only the partial exam will count to grade students who split the course. Only course notes and hand written notes are allowed for the exams. diff --git a/project/src/.merlin b/project/src/.merlin new file mode 100644 index 0000000..7de32a3 --- /dev/null +++ b/project/src/.merlin @@ -0,0 +1,9 @@ +S kremlin +S alphalib +B _build +B _build/kremlin +B _build/alphalib +PKG unix +PKG process +PKG pprint +PKG ppx_deriving.std diff --git a/project/src/CPS.ml b/project/src/CPS.ml new file mode 100644 index 0000000..9b3d87d --- /dev/null +++ b/project/src/CPS.ml @@ -0,0 +1,7 @@ +(* The source calculus. *) +module S = Lambda +(* The target calculus. *) +module T = Tail + +let cps_term (t : S.term) : T.term = + assert false diff --git a/project/src/CPS.mli b/project/src/CPS.mli new file mode 100644 index 0000000..2497ddd --- /dev/null +++ b/project/src/CPS.mli @@ -0,0 +1,4 @@ +(* Through a CPS transformation, the surface language [Lambda] is translated + down to the intermediate language [Tail]. *) + +val cps_term: Lambda.term -> Tail.term diff --git a/project/src/Cook.ml b/project/src/Cook.ml new file mode 100644 index 0000000..89657ed --- /dev/null +++ b/project/src/Cook.ml @@ -0,0 +1,54 @@ +open Error + +(* The source calculus. *) +module S = RawLambda +(* The target calculus. *) +module T = Lambda + +(* Environments map strings to atoms. *) +module Env = + Map.Make(String) + +(* [bind env x] creates a fresh atom [a] and extends the environment [env] + with a mapping of [x] to [a]. *) +let bind env x = + let a = Atom.fresh x in + Env.add x a env, a + +let rec cook_term env { S.place; S.value } = + match value with + | S.Var x -> + begin try + T.Var (Env.find x env) + with Not_found -> + error place "Unbound variable: %s" x + end + | S.Lam (x, t) -> + let env, x = bind env x in + T.Lam (T.NoSelf, x, cook_term env t) + | S.App (t1, t2) -> + T.App (cook_term env t1, cook_term env t2) + | S.Lit i -> + T.Lit i + | S.BinOp (t1, op, t2) -> + T.BinOp (cook_term env t1, op, cook_term env t2) + | S.Print t -> + T.Print (cook_term env t) + | S.Let (S.NonRecursive, x, t1, t2) -> + let t1 = cook_term env t1 in + let env, x = bind env x in + let t2 = cook_term env t2 in + T.Let (x, t1, t2) + | S.Let (S.Recursive, f, { S.value = S.Lam (x, t1); _ }, t2) -> + let env, f = bind env f in + let x, t1 = + let env, x = bind env x in + x, cook_term env t1 + in + let t2 = cook_term env t2 in + T.Let (f, T.Lam (T.Self f, x, t1), t2) + | S.Let (S.Recursive, _, { S.place; _ }, _) -> + error place "the right-hand side of 'let rec' must be a lambda-abstraction" + +let cook_term t = + cook_term Env.empty t diff --git a/project/src/Cook.mli b/project/src/Cook.mli new file mode 100644 index 0000000..617e408 --- /dev/null +++ b/project/src/Cook.mli @@ -0,0 +1,15 @@ +(* This module translates [RawLambda] into [Lambda]. *) + +(* This involves ensuring that every name is properly bound (otherwise, an + error is reported) and switching from a representation of names as strings + to a representation of names as atoms. *) + +(* This also involves checking that the right-hand side of every [let] + construct is a function (otherwise, an error is reported) and switching + from a representation where [let] constructs can carry a [rec] annotation + to a representation where functions can carry such an annotation. *) + +(* This also involves dropping places (that is, source code locations), since + they are no longer used after this phase. *) + +val cook_term: RawLambda.term -> Lambda.term diff --git a/project/src/Defun.ml b/project/src/Defun.ml new file mode 100644 index 0000000..7264dac --- /dev/null +++ b/project/src/Defun.ml @@ -0,0 +1,7 @@ +(* The source calculus. *) +module S = Tail +(* The target calculus. *) +module T = Top + +let defun_term (t : S.term) : T.program = + assert false diff --git a/project/src/Defun.mli b/project/src/Defun.mli new file mode 100644 index 0000000..0c02815 --- /dev/null +++ b/project/src/Defun.mli @@ -0,0 +1,4 @@ +(* Through defunctionalization, the intermediate language [Tail] is translated + down to the next intermediate language, [Top]. *) + +val defun_term: Tail.term -> Top.program diff --git a/project/src/Error.ml b/project/src/Error.ml new file mode 100644 index 0000000..e4f1d8b --- /dev/null +++ b/project/src/Error.ml @@ -0,0 +1,40 @@ +open Lexing + +type place = + position * position + +let place lexbuf : place = + lexbuf.lex_start_p, lexbuf.lex_curr_p + +let line p : int = + p.pos_lnum + +let column p : int = + p.pos_cnum - p.pos_bol + +let show place : string = + let startp, endp = place in + Printf.sprintf "File \"%s\", line %d, characters %d-%d" + startp.pos_fname + (line startp) + (column startp) + (endp.pos_cnum - startp.pos_bol) (* intentionally [startp.pos_bol] *) + +let display continuation header place format = + Printf.fprintf stderr "%s:\n" (show place); + Printf.kfprintf + continuation + stderr + (header ^^ format ^^ "\n%!") + +let error place format = + display + (fun _ -> exit 1) + "Error: " + place format + +let set_filename lexbuf filename = + lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = filename } + +let pp_place formatter _place = + Format.fprintf formatter "<>" diff --git a/project/src/Error.mli b/project/src/Error.mli new file mode 100644 index 0000000..b8ba5f9 --- /dev/null +++ b/project/src/Error.mli @@ -0,0 +1,30 @@ +open Lexing + +(* A place is a pair of a start position and an end position. *) + +type place = + position * position + +(* [set_filename lexbuf filename] updates [lexbuf] to record the + fact that the current file name is [filename]. This file name + is later used in error messages. *) + +val set_filename: lexbuf -> string -> unit + +(* [place lexbuf] produces a pair of the current token's start and + end positions. This function is useful when reporting an error + during lexing. *) + +val place: lexbuf -> place + +(* [error place format ...] displays an error message and exits. + The error message is located at [place]. The error message + is composed based on [format] and the extra arguments [...]. *) + +val error: place -> ('a, out_channel, unit, 'b) format4 -> 'a + +(* [pp_place formatter place] prints a place. It is used by + [@@deriving show] for data structures that contain places. + As of now, it prints nothing. *) + +val pp_place: Format.formatter -> place -> unit diff --git a/project/src/Finish.ml b/project/src/Finish.ml new file mode 100644 index 0000000..08779b3 --- /dev/null +++ b/project/src/Finish.ml @@ -0,0 +1,323 @@ +(* The source calculus. *) +module S = Top +(* The target calculus. *) +module T = C + +(* -------------------------------------------------------------------------- *) + +(* [interval i j f] constructs the list [[f i; f (i + 1); ...; f (j - 1)]]. *) + +let rec interval i j (f : int -> 'a) : 'a list = + if i < j then + f i :: interval (i + 1) j f + else + [] + +(* -------------------------------------------------------------------------- *) + +(* [index xs] constructs a list of pairs, where each element of [xs] is paired + with its index. Indices are 0-based. *) + +let index (xs : 'a list) : (int * 'a) list = + let n = List.length xs in + let indices = interval 0 n (fun i -> i) in + List.combine indices xs + +(* -------------------------------------------------------------------------- *) + +(* The number of fields of a block, not counting its tag. *) + +let block_num_fields b = + match b with + | S.Con (_, vs) -> + List.length vs + +(* -------------------------------------------------------------------------- *) + +(* A simple-minded way of ensuring that every atom is printed as a + distinct string is to concatenate the atom's hint and identity, + with an underscore in between. This is guaranteed to rule out + collisions. *) + +let var (x : S.variable) : T.ident = + Printf.sprintf "%s_%d" (Atom.hint x) (Atom.identity x) + +let evar (x : S.variable) : T.expr = + T.Name (var x) + +(* -------------------------------------------------------------------------- *) + +(* Predefined C types and functions. *) + +(* A universal type: every value is translated to a C value of type [univ]. + This is a union type (i.e., an untagged sum) of integers and pointers to + memory blocks. *) + +let univ : T.type_spec = + T.Named "univ" + +(* The type of integers. *) + +let int : T.type_spec = + T.Named "int" + +(* The type [char] appears in the type of [main]. *) + +let char : T.type_spec = + T.Named "char" + +let answer : T.type_spec = + int + (* Our functions never actually return, since they are tail recursive. + We use [int] as their return type, since this is the return type of + [main]. *) + +let exit : T.expr = + T.Name "exit" + +let printf : T.expr = + T.Name "printf" + +(* -------------------------------------------------------------------------- *) + +(* [declare x init] constructs a local variable declaration for a variable [x] + of type [univ]. [x] is optionally initialized according to [init]. *) + +let declare (x : S.variable) (init : T.init option) : T.declaration = + univ, None, [ T.Ident (var x), init ] + +(* -------------------------------------------------------------------------- *) + +(* Macro invocations. *) + +let macro m es : T.expr = + (* We disguise a macro invocation as a procedure call. *) + T.Call (T.Name m, es) + +(* -------------------------------------------------------------------------- *) + +(* Integer literals; conversions between [univ] and [int]. *) + +let iconst i : T.expr = + T.Constant (Constant.Int64, string_of_int i) + +let to_int v : T.expr = + macro "TO_INT" [ v ] + (* This is an unsafe conversion, of course. *) + +let from_int v : T.expr = + macro "FROM_INT" [ v ] + +(* -------------------------------------------------------------------------- *) + +(* The translation of values. *) + +let finish_op = function + | S.OpAdd -> + T.K.Add + | S.OpSub -> + T.K.Sub + | S.OpMul -> + T.K.Mult + | S.OpDiv -> + T.K.Div + +let rec finish_value (v : S.value) : T.expr = + match v with + | S.VVar x -> + evar x + | S.VLit i -> + from_int (iconst i) + | S.VBinOp (v1, op, v2) -> + from_int ( + T.Op2 ( + finish_op op, + to_int (finish_value v1), + to_int (finish_value v2) + ) + ) + +let finish_values vs = + List.map finish_value vs + +(* -------------------------------------------------------------------------- *) + +(* A macro for allocating a memory block. *) + +let alloc b : T.expr = + T.Call (T.Name "ALLOC", [ iconst (block_num_fields b) ]) + +(* -------------------------------------------------------------------------- *) + +(* Macros for reading and initializing the tag of a memory block. *) + +let read_tag (v : S.value) : T.expr = + macro "GET_TAG" [ finish_value v ] + +let set_tag (x : S.variable) (tag : S.tag) : T.stmt = + T.Expr (macro "SET_TAG" [ evar x; iconst tag ]) + +(* -------------------------------------------------------------------------- *) + +(* Macros for reading and setting a field in a memory block. *) + +let read_field (v : S.value) (i : int) : T.expr = + (* [i] is a 0-based field index. *) + macro "GET_FIELD" [ finish_value v; iconst i ] + +let read_field (v : S.value) (i, x) (t : T.stmt list) : T.stmt list = + (* [x] is a variable, which is declared and initialized with + the content of the [i]th field of the block [v]. *) + T.DeclStmt (declare x (Some (T.InitExpr (read_field v i)))) :: + t + +let read_fields (v : S.value) xs (t : T.stmt list) : T.stmt list = + (* [xs] are variables, which are declared and initialized with + the contents of the fields of the block [v]. *) + List.fold_right (read_field v) (index xs) t + +let set_field x i (v : S.value) : T.stmt = + T.Expr (macro "SET_FIELD" [ evar x; iconst i; finish_value v ]) + +(* -------------------------------------------------------------------------- *) + +(* A sequence of instructions for initializing a memory block. *) + +let init_block (x : S.variable) (b : S.block) : T.stmt list = + match b with + | S.Con (tag, vs) -> + T.Comment "Initializing a memory block:" :: + set_tag x tag :: + List.mapi (set_field x) vs + +(* -------------------------------------------------------------------------- *) + +(* Function calls, as expressions and as statements. *) + +let ecall f args : T.expr = + T.Call (f, args) + +let scall f args : T.stmt = + T.Expr (ecall f args) + +(* -------------------------------------------------------------------------- *) + +(* The translation of terms. *) + +let rec finish_term (t : S.term) : C.stmt = + match t with + | S.Exit -> + T.Compound [ + scall exit [ iconst 0 ] + ] + | S.TailCall (f, vs) -> + T.Return (Some (ecall (evar f) (finish_values vs))) + | S.Print (v, t) -> + T.Compound [ + scall printf [ T.Literal "%d\\n"; to_int (finish_value v) ]; + finish_term t + ] + | S.LetVal (x, v1, t2) -> + T.Compound [ + T.DeclStmt (declare x (Some (T.InitExpr (finish_value v1)))); + finish_term t2 + ] + | S.LetBlo (x, b1, t2) -> + T.Compound ( + T.DeclStmt (declare x (Some (T.InitExpr (alloc b1)))) :: + init_block x b1 @ + [ finish_term t2 ] + ) + | S.Swi (v, bs) -> + T.Switch ( + read_tag v, + finish_branches v bs, + default + ) + +and default : T.stmt = + (* This default [switch] branch should never be taken. *) + T.Compound [ + scall printf [ T.Literal "Oops! A nonexistent case has been taken.\\n" ]; + scall exit [ iconst 42 ]; + ] + +and finish_branches v bs = + List.map (finish_branch v) bs + +and finish_branch v (S.Branch (tag, xs, t)) : T.expr * T.stmt = + iconst tag, + T.Compound (read_fields v xs [finish_term t]) + +(* -------------------------------------------------------------------------- *) + +(* Function declarations. *) + +(* We distinguish the function [main], whose type is imposed by the C standard, + and ordinary functions, whose parameters have type [univ]. *) + +(* A parameter of an ordinary function has type [univ]. *) + +let param (x : S.variable) : T.param = + univ, T.Ident (var x) + +(* A declaration of an ordinary function. *) + +let declare_ordinary_function f xs : T.declaration = + answer, None, [ T.Function (None, T.Ident (var f), List.map param xs), None ] + +(* The declaration of the main function. *) + +let declare_main_function : T.declaration = + let params = [ + int, T.Ident "argc"; + char, T.Pointer (T.Pointer (T.Ident "argv")) + ] in + int, None, [ T.Function (None, T.Ident "main", params), None ] + +(* -------------------------------------------------------------------------- *) + +(* A function definition. *) + +type decl_or_fun = + T.declaration_or_function + +let define (decl : T.declaration) (t : S.term) : decl_or_fun = + T.Function ( + [], (* no comments *) + false, (* not inlined *) + decl, + T.Compound [finish_term t] + ) + +let define_ordinary_function (S.Fun (f, xs, t)) : decl_or_fun = + define (declare_ordinary_function f xs) t + +let define_main_function (t : S.term) : decl_or_fun = + define declare_main_function t + +(* -------------------------------------------------------------------------- *) + +(* Because all functions are mutually recursive, their definitions must be + preceded with their prototypes. *) + +let prototype (f : decl_or_fun) : decl_or_fun = + match f with + | T.Function (_, _, declaration, _) -> + T.Decl ([], declaration) + | T.Decl _ -> + assert false + +let prototypes (fs : decl_or_fun list) : decl_or_fun list = + List.map prototype fs @ + fs + +(* -------------------------------------------------------------------------- *) + +(* The translation of a complete program. *) + +let finish_program (S.Prog (decls, main) : S.program) : T.program = + prototypes ( + define_main_function main :: + List.map define_ordinary_function decls + ) diff --git a/project/src/Finish.mli b/project/src/Finish.mli new file mode 100644 index 0000000..6785984 --- /dev/null +++ b/project/src/Finish.mli @@ -0,0 +1,5 @@ +(* This function implements a translation of the intermediate language [Top] + down to [C]. This transformation is mostly a matter of choosing appropriate + C constructs to reflect the concepts of the language [Top]. *) + +val finish_program: Top.program -> C.program diff --git a/project/src/Lambda.ml b/project/src/Lambda.ml new file mode 100644 index 0000000..fdb9ecb --- /dev/null +++ b/project/src/Lambda.ml @@ -0,0 +1,48 @@ +(* This language is the untyped lambda-calculus, extended with recursive + lambda-abstractions, nonrecursive [let] bindings, integer literals and + integer arithmetic operations, and the primitive operation [print]. *) + +(* This language is really the same language as [RawLambda], with the + following internal differences: + + 1. Instead of recursive [let] bindings, the language has recursive + lambda-abstractions. A [let rec] definition whose right-hand side is not + a lambda-abstraction is rejected during the translation of [RawLambda] + to [Lambda]. + + 2. Variables are represented by atoms (instead of strings). A term with an + unbound variable is rejected during the translation of [RawLambda] to + [Lambda]. + + 3. Terms are no longer annotated with places. *) + +(* Variables are atoms. *) + +type variable = + Atom.atom + + (* Every lambda-abstraction is marked recursive or nonrecursive. Whereas a + nonrecursive lambda-abstraction [fun x -> t] binds one variable [x], a + recursive lambda-abstraction [fix f. fun x -> t] binds two variables [f] + and [x]. The variable [f] is a self-reference. *) + +and self = + | Self of variable + | NoSelf + +and binop = RawLambda.binop = + | OpAdd + | OpSub + | OpMul + | OpDiv + +and term = + | Var of variable + | Lam of self * variable * term + | App of term * term + | Lit of int + | BinOp of term * binop * term + | Print of term + | Let of variable * term * term + +[@@deriving show { with_path = false }] diff --git a/project/src/Lexer.mll b/project/src/Lexer.mll new file mode 100644 index 0000000..ba016f0 --- /dev/null +++ b/project/src/Lexer.mll @@ -0,0 +1,97 @@ +{ + +open Lexing +open Error +open Parser +open RawLambda + +} + +(* -------------------------------------------------------------------------- *) + +(* Regular expressions. *) + +let newline = + ('\010' | '\013' | "\013\010") + +let whitespace = + [ ' ' '\t' ] + +let lowercase = + ['a'-'z' '\223'-'\246' '\248'-'\255' '_'] + +let uppercase = + ['A'-'Z' '\192'-'\214' '\216'-'\222'] + +let identchar = + ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] + +let digit = + ['0'-'9'] + +(* -------------------------------------------------------------------------- *) + +(* The lexer. *) + +rule entry = parse +| "fun" + { FUN } +| "in" + { IN } +| "let" + { LET } +| "print" + { PRINT } +| "rec" + { REC } +| "->" + { ARROW } +| "=" + { EQ } +| "(" + { LPAREN } +| ")" + { RPAREN } +| "+" + { ADDOP OpAdd } +| "-" + { ADDOP OpSub } +| "*" + { MULOP OpMul } +| "/" + { MULOP OpDiv } +| (lowercase identchar *) as x + { IDENT x } +| digit+ as i + { try + INTLITERAL (int_of_string i) + with Failure _ -> + error (place lexbuf) "invalid integer literal." } +| "(*" + { ocamlcomment (place lexbuf) lexbuf; entry lexbuf } +| newline + { new_line lexbuf; entry lexbuf } +| whitespace+ + { entry lexbuf } +| eof + { EOF } +| _ as c + { error (place lexbuf) "unexpected character: '%c'." c } + +(* ------------------------------------------------------------------------ *) + + (* Skip OCaml-style comments. Comments can be nested. This sub-lexer is + parameterized with the place of the opening comment, so if an unterminated + comment is detected, we can show where it was opened. *) + +and ocamlcomment p = parse +| "*)" + { () } +| "(*" + { ocamlcomment (place lexbuf) lexbuf; ocamlcomment p lexbuf } +| newline + { new_line lexbuf; ocamlcomment p lexbuf } +| eof + { error p "unterminated comment." } +| _ + { ocamlcomment p lexbuf } diff --git a/project/src/Main.ml b/project/src/Main.ml new file mode 100644 index 0000000..9c8eed4 --- /dev/null +++ b/project/src/Main.ml @@ -0,0 +1,101 @@ +(* -------------------------------------------------------------------------- *) + +(* Parse the command line. *) + +let debug = + ref false + +let filenames = + ref [] + +let record filename = + filenames := filename :: !filenames + +let options = + Arg.align [ + "--debug", Arg.Set debug, " Enable debugging output"; + ] + +let usage = + Printf.sprintf "Usage: %s " Sys.argv.(0) + +let () = + Arg.parse options record usage + +let debug = + !debug + +let filenames = + List.rev !filenames + +(* -------------------------------------------------------------------------- *) + +(* Printing a syntax tree in an intermediate language (for debugging). *) + +let print_delimiter () = + Printf.eprintf "----------------------------------------"; + Printf.eprintf "----------------------------------------\n" + +let dump (phase : string) (show : 'term -> string) (t : 'term) = + if debug then begin + print_delimiter(); + Printf.eprintf "%s:\n\n%s\n\n%!" phase (show t) + end; + t + +(* -------------------------------------------------------------------------- *) + +(* Reading and parsing a file. *) + +let read filename : RawLambda.term = + try + let contents = Utils.file_get_contents filename in + let lexbuf = Lexing.from_string contents in + Error.set_filename lexbuf filename; + try + Parser.entry Lexer.entry lexbuf + with + | Parser.Error -> + Error.error (Error.place lexbuf) "Syntax error." + with + | Sys_error msg -> + prerr_endline msg; + exit 1 + +(* -------------------------------------------------------------------------- *) + +(* Printing the final C program on the standard output channel. *) + +let output (p : C.program) : unit = + Printf.printf "#include\n"; + Printf.printf "#include\n"; + Printf.printf "#include \"prologue.h\"\n\n"; + let print_program = PrintCommon.print_program PrintC.p_decl_or_function in + let buf = Buffer.create 1024 in + PrintCommon.printf_of_pprint_pretty print_program buf p; + print_endline (Buffer.contents buf) + +(* -------------------------------------------------------------------------- *) + +(* The complete processing pipeline. Beautiful, isn't it? *) + +let process filename = + filename + |> read + |> dump "RawLambda" RawLambda.show_term + |> Cook.cook_term + |> dump "Lambda" Lambda.show_term + |> CPS.cps_term + |> dump "Tail" Tail.show_term + |> Defun.defun_term + |> dump "Top" Top.show_program + |> Finish.finish_program + |> dump "C" C.show_program + |> output + +(* -------------------------------------------------------------------------- *) + +(* The main program. *) + +let () = + List.iter process filenames diff --git a/project/src/Makefile b/project/src/Makefile new file mode 100644 index 0000000..73f39e1 --- /dev/null +++ b/project/src/Makefile @@ -0,0 +1,28 @@ +SHELL := bash +TARGET := Main.native +JOUJOU := joujou +DIRS := kremlin,alphalib +OCAMLBUILD :=\ + ocamlbuild \ + -classic-display \ + -j 4 \ + -use-ocamlfind \ + -use-menhir \ + -menhir "menhir -lg 1 -la 1 --explain" \ + -Is $(DIRS) \ + +.PHONY: all test clean + +all: + @ $(OCAMLBUILD) -quiet $(TARGET) + @ ln -sf $(TARGET) $(JOUJOU) + +test: all + @ make -C test test + +clean: + rm -f *~ + rm -f tests/*.c tests/*.out + $(OCAMLBUILD) -clean + rm -f $(TARGET) $(JOUJOU) + $(MAKE) -C test clean diff --git a/project/src/Parser.mly b/project/src/Parser.mly new file mode 100644 index 0000000..4f032b2 --- /dev/null +++ b/project/src/Parser.mly @@ -0,0 +1,155 @@ +%token IDENT +%token INTLITERAL +%token FUN IN LET PRINT REC +%token ARROW EQ LPAREN RPAREN +%token MULOP ADDOP +%token EOF + +%start entry + +%{ + +open RawLambda + +%} + +%% + +(* -------------------------------------------------------------------------- *) + +(* A toplevel phrase is just a term. *) + +entry: + t = any_term EOF + { t } + +(* -------------------------------------------------------------------------- *) + +(* The syntax of terms is stratified as follows: + + atomic_term -- unambiguously delimited terms + application_term -- n-ary applications of atomic terms + multiplicative_term -- built using multiplication & division + additive_term -- built using addition & subtraction + any_term -- everything + + A [match/with/end] construct is terminated with an [end] keyword, as in Coq, + so it is an atomic term. *) + +atomic_term_: +| LPAREN t = any_term RPAREN + { t.value } +| x = IDENT + { Var x } +| i = INTLITERAL + { Lit i } + +application_term_: +| t = atomic_term_ + { t } +| t1 = placed(application_term_) t2 = placed(atomic_term_) + { App (t1, t2) } +| PRINT t2 = placed(atomic_term_) + { Print t2 } + +%inline multiplicative_term_: + t = left_associative_level(application_term_, MULOP, mkbinop) + { t } + +%inline additive_term_: + t = left_associative_level(multiplicative_term_, ADDOP, mkbinop) + { t } + +any_term_: +| t = additive_term_ + { t } +| FUN x = IDENT ARROW t = any_term + { Lam (x, t) } +| LET mode = recursive x = IDENT EQ t1 = any_term IN t2 = any_term + { Let (mode, x, t1, t2) } + +%inline any_term: + t = placed(any_term_) + { t } + +(* -------------------------------------------------------------------------- *) + +(* An infix-left-associative-operator level in a hierarchy of arithmetic + expressions. *) + +(* [basis] is the next lower level in the hierarchy. + [op] is the category of binary operators. + [action] is a ternary sequencing construct. *) + +left_associative_level(basis, op, action): +| t = basis +| t = action( + left_associative_level(basis, op, action), + op, + basis + ) + { t } + +(* -------------------------------------------------------------------------- *) + +(* A ternary sequence whose semantic action builds a [BinOp] node. *) + +%inline mkbinop(term1, op, term2): + t1 = placed(term1) op = op t2 = placed(term2) + { BinOp (t1, op, t2) } + +(* -------------------------------------------------------------------------- *) + +(* A [let] construct carries an optional [rec] annotation. *) + +recursive: +| REC { Recursive } +| { NonRecursive } + +(* -------------------------------------------------------------------------- *) + +(* A term is annotated with its start and end positions, for use in error + messages. *) + +%inline placed(X): + x = X + { { place = ($startpos, $endpos); value = x } } + +(* -------------------------------------------------------------------------- *) + +(* In a right-flexible list, the last delimiter is optional, i.e., [delim] can + be viewed as a terminator or a separator, as desired. *) + +(* There are several ways of expressing this. One could say it is either a + separated list or a terminated list; this works if one uses right recursive + lists. Or, one could say that it is a separated list followed with an + optional delimiter; this works if one uses a left-recursive list. The + following formulation is direct and seems most natural. It should lead to + the smallest possible automaton. *) + +right_flexible_list(delim, X): +| (* nothing *) + { [] } +| x = X + { [x] } +| x = X delim xs = right_flexible_list(delim, X) + { x :: xs } + +(* In a left-flexible list, the first delimiter is optional, i.e., [delim] can + be viewed as an opening or as a separator, as desired. *) + +(* Again, there are several ways of expressing this, and again, I suppose the + following formulation is simplest. It is the mirror image of the above + definition, so it is naturally left-recursive, this time. *) + +reverse_left_flexible_list(delim, X): +| (* nothing *) + { [] } +| x = X + { [x] } +| xs = reverse_left_flexible_list(delim, X) delim x = X + { x :: xs } + +%inline left_flexible_list(delim, X): + xs = reverse_left_flexible_list(delim, X) + { List.rev xs } diff --git a/project/src/RawLambda.ml b/project/src/RawLambda.ml new file mode 100644 index 0000000..51f33e1 --- /dev/null +++ b/project/src/RawLambda.ml @@ -0,0 +1,55 @@ +(* Variables are strings. *) + +type variable = + string + +(* Every [let] binding is marked recursive or nonrecursive. *) + +and recursive = + | Recursive + | NonRecursive + +(* The four standard integer arithmetic operations are supported. *) + +and binop = + | OpAdd + | OpSub + | OpMul + | OpDiv + +(* This language is the untyped lambda-calculus, extended with possibly + recursive [let] bindings, integer literals (that is, constants), integer + arithmetic operations, and the primitive operation [print], which prints an + integer value and returns it. *) + +and term_ = + | Var of variable + | Lam of variable * term + | App of term * term + | Lit of int + | BinOp of term * binop * term + | Print of term + | Let of recursive * variable * term * term + +(* Every abstract syntax tree node of type [term] is annotated with a place, + that is, a position in the source code. This allows us to produce a good + error message when a problem is detected. *) + +and term = + term_ placed + +(* A value of type ['a placed] can be thought of as a value of type ['a] + decorated with a place. *) + +and 'a placed = { + place: Error.place; + value: 'a + } + +(* The following annotation requests the automatic generation of a [show_] + function for each of the types defined above. For instance, the function + [show_term], of type [term -> string], converts a term to a string. These + functions can be useful during debugging. Running with [--debug] causes + every intermediate abstract syntax tree to be displayed in this way. *) + +[@@deriving show { with_path = false }] diff --git a/project/src/Tail.ml b/project/src/Tail.ml new file mode 100644 index 0000000..44b8b6d --- /dev/null +++ b/project/src/Tail.ml @@ -0,0 +1,132 @@ +(* This intermediate language describes the result of the CPS transformation. + It is a lambda-calculus where the ordering of computations is explicit and + where every function call is a tail call. + + The following syntactic categories are distinguished: + + 1. "Values" include variables, integer literals, and applications of the + primitive integer operations to values. Instead of "values", they could + also be referred to as "pure expressions". They are expressions whose + evaluation terminates and has no side effect, not even memory + allocation. + + 2. "Blocks" include lambda-abstractions. Even though lambda-abstractions + are traditionally considered values, here, they are viewed as + expressions whose evaluation has a side effect, namely, the allocation + of a memory block. + + 3. "Terms" are expressions with side effects. Terms always appear in tail + position: an examination of the syntax of terms shows that a term can be + viewed as a sequence of [LetVal], [LetBlo] and [Print] instructions, + terminated with either [Exit] or [TailCall]. This implies, in + particular, that every call is a tail call. + + In contrast with the surface language, where every lambda-abstraction has + arity 1, in this calculus, lambda-abstractions of arbitrary arity are + supported. A lambda-abstraction [Lam] carries a list of formal arguments + and a function call [TailCall] carries a list of actual arguments. Partial + applications or over-applications are not supported: it is the programmer's + responsibility to ensure that every function call provides exactly as many + arguments as expected by the called function. *) + +type variable = + Atom.atom + +and self = Lambda.self = + | Self of variable + | NoSelf + +and binop = Lambda.binop = + | OpAdd + | OpSub + | OpMul + | OpDiv + +and value = + | VVar of variable + | VLit of int + | VBinOp of value * binop * value + +and block = + | Lam of self * variable list * term + +(* Terms include the following constructs: + + - The primitive operation [Exit] stops the program. + + - The tail call [TailCall (v, vs)] transfers control to the function [v] + with actual arguments [vs]. (The value [v] should be a function and its + arity should be the length of [vs].) + + - The term [Print (v, t)] prints the value [v], then executes the term [t]. + (The value [v] should be a primitive integer value.) + + - The term [LetVal (x, v, t)] binds the variable [x] to the value [v], then + executes the term [t]. + + - The term [LetBlo (x, b, t)] allocates the memory block [b] and binds the + variable [x] to its address, then executes the term [t]. *) + +and term = + | Exit + | TailCall of value * value list + | Print of value * term + | LetVal of variable * value * term + | LetBlo of variable * block * term + +[@@deriving show { with_path = false }] + +(* -------------------------------------------------------------------------- *) + +(* Constructor functions. *) + +let vvar x = + VVar x + +let vvars xs = + List.map vvar xs + +(* -------------------------------------------------------------------------- *) + +(* Computing the free variables of a value, block, or term. *) + +open Atom.Set + +let rec fv_value (v : value) = + match v with + | VVar x -> + singleton x + | VLit _ -> + empty + | VBinOp (v1, _, v2) -> + union (fv_value v1) (fv_value v2) + +and fv_values (vs : value list) = + union_many fv_value vs + +and fv_lambda (xs : variable list) (t : term) = + diff (fv_term t) (of_list xs) + +and fv_block (b : block) = + match b with + | Lam (NoSelf, xs, t) -> + fv_lambda xs t + | Lam (Self f, xs, t) -> + remove f (fv_lambda xs t) + +and fv_term (t : term) = + match t with + | Exit -> + empty + | TailCall (v, vs) -> + fv_values (v :: vs) + | Print (v1, t2) -> + union (fv_value v1) (fv_term t2) + | LetVal (x, v1, t2) -> + union + (fv_value v1) + (remove x (fv_term t2)) + | LetBlo (x, b1, t2) -> + union + (fv_block b1) + (remove x (fv_term t2)) diff --git a/project/src/Top.ml b/project/src/Top.ml new file mode 100644 index 0000000..edbb0fb --- /dev/null +++ b/project/src/Top.ml @@ -0,0 +1,94 @@ +(* This intermediate language describes the result of defunctionalization. + It retains the key features of the previous calculus, [Tail], in that + the ordering of computations is explicit and every function call is a + tail call. Furthermore, lambda-abstractions disappear. A memory block + [Con] now contains an integer tag followed with a number of fields, + which hold values. A [switch] construct appears, which allows testing + the tag of a memory block. A number of (closed, mutually recursive) + functions can be defined at the top level. *) + +type tag = + int + +and variable = + Atom.atom + +and binop = Tail.binop = + | OpAdd + | OpSub + | OpMul + | OpDiv + +and value = Tail.value = + | VVar of variable + | VLit of int + | VBinOp of value * binop * value + +(* A block contains an integer tag, followed with a number of fields. *) + +and block = + | Con of tag * value list + +(* The construct [Swi (v, branches)] reads the integer tag stored in the + memory block at address [v] and performs a case analysis on this tag, + transferring control to the appropriate branch. (The value [v] should be a + pointer to a memory block.) *) + +and term = + | Exit + | TailCall of variable * value list + | Print of value * term + | LetVal of variable * value * term + | LetBlo of variable * block * term + | Swi of value * branch list + +(* A branch [tag xs -> t] is labeled with an integer tag [tag], and is + executed if the memory block carries this tag. The variables [xs] are + then bounds to the fields of the memory block. (The length of the list + [xs] should be the number of fields of the memory block.) *) + +and branch = + | Branch of tag * variable list * term + +(* A toplevel function declaration mentions the function's name, formal + parameters, and body. *) + +and function_declaration = + | Fun of variable * variable list * term + +(* A complete program consits of a set of toplevel function declarations + and a term (the "main program"). The functions are considered mutually + recursive: every function may refer to every function. *) + +and program = + | Prog of function_declaration list * term + +[@@deriving show { with_path = false }] + +(* -------------------------------------------------------------------------- *) + +(* Constructor functions. *) + +let vvar = + Tail.vvar + +let vvars = + Tail.vvars + +(* [let x_1 = v_1 in ... let x_n = v_n in t] *) + +let rec sequential_let (xs : variable list) (vs : value list) (t : term) = + match xs, vs with + | [], [] -> + t + | x :: xs, v :: vs -> + LetVal (x, v, sequential_let xs vs t) + | _ -> + assert false + +(* [let x_1 = v_1 and ... x_n = v_n in t] *) + +let parallel_let (xs : variable list) (vs : value list) (t : term) = + assert (List.length xs = List.length vs); + assert (Atom.Set.disjoint (Atom.Set.of_list xs) (Tail.fv_values vs)); + sequential_let xs vs t diff --git a/project/src/_tags b/project/src/_tags new file mode 100644 index 0000000..bf1bddf --- /dev/null +++ b/project/src/_tags @@ -0,0 +1,8 @@ +true: \ + debug, \ + strict_sequence, \ + warn(A-3-4-30-44-42-45-50), \ + package(unix), \ + package(process), \ + package(pprint), \ + package(ppx_deriving.std) diff --git a/project/src/alphalib/Atom.ml b/project/src/alphalib/Atom.ml new file mode 100644 index 0000000..6502410 --- /dev/null +++ b/project/src/alphalib/Atom.ml @@ -0,0 +1,215 @@ +(* -------------------------------------------------------------------------- *) + +(* We impose maximal sharing on strings so as to reduce the total amount of + space that they occupy. This is done using a weak hash set. *) + +module StringStorage = + Weak.Make(struct + type t = string + let equal (s1 : string) (s2 : string) = (s1 = s2) + let hash = Hashtbl.hash + end) + +let share : string -> string = + StringStorage.merge (StringStorage.create 128) + +(* -------------------------------------------------------------------------- *) + +(* Removing any trailing digits in a string. *) + +let is_digit c = + Char.code '0' <= Char.code c && Char.code c <= Char.code '9' + +let remove_trailing_digits (s : string) : string = + let n = ref (String.length s) in + while !n > 0 && is_digit s.[!n-1] do n := !n-1 done; + (* We assume that there is at least one non-digit character in the string. *) + assert (!n > 0); + String.sub s 0 !n + +(* -------------------------------------------------------------------------- *) + +(* An atom is implemented as a pair of an integer identity and a string that + serves as a printing hint. *) + +(* We maintain the invariant that a hint is nonempty and does not end in a + digit. This allows us to later produce unique identifiers, without risk of + collisions, by concatenating a hint and a unique number. *) + +(* To preserve space, hints are maximally shared. This is not essential for + correctness, though. *) + +type atom = { identity: int; hint: string } + +and t = atom + [@@deriving show { with_path = false }] + +let identity a = + a.identity + +let hint a = + a.hint + +(* -------------------------------------------------------------------------- *) + +(* A global integer counter holds the next available identity. *) + +let counter = + ref 0 + +let allocate () = + let number = !counter in + counter := number + 1; + assert (number >= 0); + number + +(* [fresh hint] produces a fresh atom. *) + +(* The argument [hint] must not be a string of digits. *) + +let fresh hint = + let identity = allocate() + and hint = share (remove_trailing_digits hint) in + { identity; hint } + +(* [copy a] returns a fresh atom modeled after the atom [a]. *) + +let copy a = + fresh a.hint + +(* -------------------------------------------------------------------------- *) + +(* Comparison of atoms. *) + +let equal a b = + a.identity = b.identity + +let compare a b = + (* Identities are always positive numbers (see [allocate] above) + so I believe overflow is impossible here. *) + a.identity - b.identity + +let hash a = + Hashtbl.hash a.identity + +(* -------------------------------------------------------------------------- *) + +(* A scratch buffer for printing. *) + +let scratch = + Buffer.create 1024 + +(* [print_separated_sequence] prints a sequence of elements into the [scratch] + buffer. The sequence is given by the higher-order iterator [iter], applied + to the collection [xs]. The separator is the string [sep]. Each element is + transformed to a string by the function [show]. *) + +let print_separated_sequence show sep iter xs : unit = + let first = ref true in + iter (fun x -> + if !first then begin + Buffer.add_string scratch (show x); + first := false + end + else begin + Buffer.add_string scratch sep; + Buffer.add_string scratch (show x) + end + ) xs + +(* -------------------------------------------------------------------------- *) + +(* Sets and maps. *) + +module Order = struct + type t = atom + let compare = compare +end + +module Set = struct + + include Set.Make(Order) + + (* A disjointness test. *) + + let disjoint xs ys = + is_empty (inter xs ys) + + (* Iterated union. *) + + let union_many (f : 'a -> t) (xs : 'a list) : t = + List.fold_left (fun accu x -> + union accu (f x) + ) empty xs + + (* Disjoint union. *) + + exception NonDisjointUnion of atom + + let disjoint_union xs ys = + match choose (inter xs ys) with + | exception Not_found -> + (* The intersection of [xs] and [ys] is empty. Return their union. *) + union xs ys + | x -> + (* The intersection contains [x]. Raise an exception. *) + raise (NonDisjointUnion x) + + let handle_NonDisjointUnion f x = + try + f x; true + with NonDisjointUnion a -> + Printf.eprintf "NonDisjointUnion: %s\n%!" (show a); + false + + (* Sets of atoms form a monoid under union. *) + + class ['z] union_monoid = object + method zero: 'z = empty + method plus: 'z -> 'z -> 'z = union + end + + (* Sets of atoms form a monoid under disjoint union. *) + + class ['z] disjoint_union_monoid = object + method zero: 'z = empty + method plus: 'z -> 'z -> 'z = disjoint_union + end + + (* These printing functions should be used for debugging purposes only. *) + + let print_to_scratch xs = + Buffer.clear scratch; + Buffer.add_string scratch "{"; + print_separated_sequence show ", " iter xs; + Buffer.add_string scratch "}" + + let show xs = + print_to_scratch xs; + let result = Buffer.contents scratch in + Buffer.reset scratch; + result + + let print oc xs = + print_to_scratch xs; + Buffer.output_buffer oc scratch; + Buffer.reset scratch + +end + +module Map = struct + + include Map.Make(Order) + + (* This is O(nlog n), whereas in principle O(n) is possible. + The abstraction barrier in OCaml's [Set] module hinders us. *) + let domain m = + fold (fun a _ accu -> Set.add a accu) m Set.empty + + let codomain f m = + fold (fun _ v accu -> Set.union (f v) accu) m Set.empty + +end + +type renaming = + atom Map.t diff --git a/project/src/alphalib/Atom.mli b/project/src/alphalib/Atom.mli new file mode 100644 index 0000000..6ec6206 --- /dev/null +++ b/project/src/alphalib/Atom.mli @@ -0,0 +1,71 @@ +(* TEMPORARY document *) + +(* An atom is a pair of a unique integer identity and a (not necessarily + unique) string. *) + +type atom + +and t = atom + [@@deriving show] + +val identity: atom -> int +val hint: atom -> string + +(* Producing fresh atoms. *) + +val fresh: string -> atom +val copy: atom -> atom + +(* Comparison of atoms. *) + +val equal: atom -> atom -> bool +val compare: atom -> atom -> int +val hash: atom -> int + +(* Sets. *) + +module Set : sig + include Set.S with type elt = atom + + val disjoint: t -> t -> bool + + val union_many: ('a -> t) -> 'a list -> t + + (* Disjoint union. *) + + exception NonDisjointUnion of atom + val disjoint_union: t -> t -> t + + val handle_NonDisjointUnion: ('a -> unit) -> 'a -> bool + + (* Sets of atoms form monoids under union and disjoint union. *) + + class ['z] union_monoid : object + constraint 'z = t + method zero: 'z + method plus: 'z -> 'z -> 'z + end + + class ['z] disjoint_union_monoid : object + constraint 'z = t + method zero: 'z + method plus: 'z -> 'z -> 'z + end + + val show: t -> string + val print: out_channel -> t -> unit +end + +(* Maps. *) + +module Map : sig + + include Map.S with type key = atom + + val domain: 'a t -> Set.t + val codomain: ('a -> Set.t) -> 'a t -> Set.t + +end + +type renaming = + atom Map.t diff --git a/project/src/kremlin/C.ml b/project/src/kremlin/C.ml new file mode 100644 index 0000000..db29569 --- /dev/null +++ b/project/src/kremlin/C.ml @@ -0,0 +1,126 @@ + + +module K = Constant +open Common + +(* This pretty-printer based on: http:/ /en.cppreference.com/w/c/language/declarations + * Many cases are omitted from this bare-bones C grammar; hopefully, to be extended. *) +type type_spec = + | Int of Constant.width + | Void + | Named of ident + | Struct of ident option * declaration list option + (** Note: the option allows for zero-sized structs (GCC's C, C++) but as + * of 2017/05/14 we never generate these. *) + | Union of ident option * declaration list + | Enum of ident option * ident list + (** not encoding all the invariants here *) + [@@deriving show { with_path = false }] + +and storage_spec = + | Typedef + | Extern + | Static + +and declarator_and_init = + declarator * init option + +and declarator_and_inits = + declarator_and_init list + +and declarator = + | Ident of ident + | Pointer of declarator + | Array of declarator * expr + | Function of calling_convention option * declarator * params + +and expr = + | Op1 of K.op * expr + | Op2 of K.op * expr * expr + | Index of expr * expr + | Deref of expr + | Address of expr + | Member of expr * expr + | MemberP of expr * expr + | Assign of expr * expr + (** not covering *=, +=, etc. *) + | Call of expr * expr list + | Name of ident + | Cast of type_name * expr + | Literal of string + | Constant of K.t + | Bool of bool + | Sizeof of expr + | SizeofType of type_spec + | CompoundLiteral of type_name * init list + | MemberAccess of expr * ident + | MemberAccessPointer of expr * ident + | InlineComment of string * expr * string + | Type of type_name + (** note: these two not in the C grammar *) + +(** this is a WILD approximation of the notion of "type name" in C _and_ a hack + * because there's the invariant that the ident found at the bottom of the + * [declarator] is irrelevant... *) +and type_name = + type_spec * declarator + +and params = + (* No support for old syntax, e.g. K&R, or [void f(void)]. *) + param list + +and param = + (** Also approximate. *) + type_spec * declarator + +and declaration = + type_spec * storage_spec option * declarator_and_inits + +and ident = + string + +and init = + | InitExpr of expr + | Designated of designator * init + | Initializer of init list + +and designator = + | Dot of ident + | Bracket of int + +(** Note: according to http:/ /en.cppreference.com/w/c/language/statements, + * declarations can only be part of a compound statement... we do not enforce + * this invariant via the type [stmt], but rather check it at runtime (see + * [mk_compound_if]), as the risk of messing things up, naturally. *) +type stmt = + | Compound of stmt list + | DeclStmt of declaration + | Expr of expr + | If of expr * stmt + | IfElse of expr * stmt * stmt + | While of expr * stmt + | For of declaration_or_expr * expr * expr * stmt + | Return of expr option + | Switch of expr * (expr * stmt) list * stmt + (** the last component is the default statement *) + | Break + | Comment of string + (** note: this is not in the C grammar *) + +and program = + declaration_or_function list + +and comment = + string + +and declaration_or_function = + | Decl of comment list * declaration + | Function of comment list * bool * declaration * stmt + (** [stmt] _must_ be a compound statement; boolean is inline *) + +and declaration_or_expr = [ + | `Decl of declaration + | `Expr of expr + | `Skip +] +[@@deriving show { with_path = false }] diff --git a/project/src/kremlin/Common.ml b/project/src/kremlin/Common.ml new file mode 100644 index 0000000..77638d7 --- /dev/null +++ b/project/src/kremlin/Common.ml @@ -0,0 +1,19 @@ +type calling_convention = + | StdCall + | CDecl + | FastCall + [@@deriving show { with_path = false }] + +type lifetime = + | Eternal + | Stack + [@@deriving show { with_path = false }] + +type flag = + | Private + | NoExtract + | CInline + | Substitute + | GcType + | Comment of string + [@@deriving show { with_path = false }] diff --git a/project/src/kremlin/Constant.ml b/project/src/kremlin/Constant.ml new file mode 100644 index 0000000..5533d38 --- /dev/null +++ b/project/src/kremlin/Constant.ml @@ -0,0 +1,66 @@ +(** Machine integers. Don't repeat the same thing everywhere. *) + +type t = width * string + [@@deriving show] + +and width = + | UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 + | Bool + | CInt (** Checked signed integers. *) + +let bytes_of_width (w: width) = + match w with + | UInt8 -> 1 + | UInt16 -> 2 + | UInt32 -> 4 + | UInt64 -> 8 + | Int8 -> 1 + | Int16 -> 2 + | Int32 -> 4 + | Int64 -> 8 + | CInt -> 4 + | _ -> invalid_arg "bytes_of_width" + +type op = + (* Arithmetic operations *) + | Add | AddW | Sub | SubW | Div | DivW | Mult | MultW | Mod + (* Bitwise operations *) + | BOr | BAnd | BXor | BShiftL | BShiftR | BNot + (* Arithmetic comparisons / boolean comparisons *) + | Eq | Neq | Lt | Lte | Gt | Gte + (* Boolean operations *) + | And | Or | Xor | Not + (* Effectful operations *) + | Assign | PreIncr | PreDecr | PostIncr | PostDecr + (* Misc *) + | Comma + [@@deriving show { with_path = false }] + +let unsigned_of_signed = function + | Int8 -> UInt8 + | Int16 -> UInt16 + | Int32 -> UInt32 + | Int64 -> UInt64 + | CInt | UInt8 | UInt16 | UInt32 | UInt64 | Bool -> raise (Invalid_argument "unsigned_of_signed") + +let is_signed = function + | Int8 | Int16 | Int32 | Int64 | CInt -> true + | UInt8 | UInt16 | UInt32 | UInt64 -> false + | Bool -> raise (Invalid_argument "is_signed") + +let is_unsigned w = not (is_signed w) + +let without_wrap = function + | AddW -> Add + | SubW -> Sub + | MultW -> Mult + | DivW -> Div + | _ -> raise (Invalid_argument "without_wrap") + +let uint8_of_int i = + assert (i < (1 lsl 8) && i >= 0); + UInt8, string_of_int i + +let uint32_of_int i = + assert (i < (1 lsl 32) && i >= 0); + UInt32, string_of_int i diff --git a/project/src/kremlin/Options.ml b/project/src/kremlin/Options.ml new file mode 100644 index 0000000..fd6bfbf --- /dev/null +++ b/project/src/kremlin/Options.ml @@ -0,0 +1,7 @@ +(* By default, the C printer inserts a minimal amount of parentheses. + However, this can trigger GCC and Clang's -Wparentheses warning, + so there is an option to produce more parentheses than strictly + necessary. *) + +let parentheses = + ref false diff --git a/project/src/kremlin/PrintC.ml b/project/src/kremlin/PrintC.ml new file mode 100644 index 0000000..34583ee --- /dev/null +++ b/project/src/kremlin/PrintC.ml @@ -0,0 +1,354 @@ +(** Pretty-printer that conforms with C syntax. Also defines the grammar of + * concrete C syntax, as opposed to our idealistic, well-formed C*. *) + +open PPrint +open PrintCommon +open C + +(* Pretty-printing of actual C syntax *****************************************) + +let p_storage_spec = function + | Typedef -> string "typedef" + | Extern -> string "extern" + | Static -> string "static" + +let rec p_type_spec = function + | Int w -> print_width w ^^ string "_t" + | Void -> string "void" + | Named s -> string s + | Union (name, decls) -> + group (string "union" ^/^ + (match name with Some name -> string name ^^ break1 | None -> empty)) ^^ + braces_with_nesting (separate_map hardline (fun p -> group (p_declaration p ^^ semi)) decls) + | Struct (name, decls) -> + group (string "struct" ^/^ + (match name with Some name -> string name | None -> empty)) ^^ + (match decls with + | Some decls -> + break1 ^^ braces_with_nesting (separate_map hardline (fun p -> group (p_declaration p ^^ semi)) decls) + | None -> + empty) + | Enum (name, tags) -> + group (string "enum" ^/^ + (match name with Some name -> string name ^^ break1 | None -> empty)) ^^ + braces_with_nesting (separate_map (comma ^^ break1) string tags) + + +and p_type_declarator d = + let rec p_noptr = function + | Ident n -> + string n + | Array (d, s) -> + p_noptr d ^^ lbracket ^^ p_expr s ^^ rbracket + | Function (cc, d, params) -> + let cc = match cc with Some cc -> print_cc cc ^^ break1 | None -> empty in + group (cc ^^ p_noptr d ^^ parens_with_nesting (separate_map (comma ^^ break 1) (fun (spec, decl) -> + group (p_type_spec spec ^/^ p_any decl) + ) params)) + | d -> + lparen ^^ p_any d ^^ rparen + and p_any = function + | Pointer d -> + star ^^ p_any d + | d -> + p_noptr d + in + p_any d + +and p_type_name (spec, decl) = + match decl with + | Ident "" -> + p_type_spec spec + | _ -> + p_type_spec spec ^^ space ^^ p_type_declarator decl + +(* http:/ /en.cppreference.com/w/c/language/operator_precedence *) +and prec_of_op2 op = + let open Constant in + match op with + | AddW | SubW | MultW | DivW -> failwith "[prec_of_op]: should've been desugared" + | Add -> 4, 4, 4 + | Sub -> 4, 4, 3 + | Div -> 3, 3, 2 + | Mult -> 3, 3, 3 + | Mod -> 3, 3, 2 + | BOr -> 10, 10, 10 + | BAnd -> + 8, 8, 8 + | BXor | Xor -> 9, 9, 9 + | BShiftL | BShiftR -> + 5, 5, 4 + | Eq | Neq -> 7, 7, 7 + | Lt | Lte | Gt | Gte -> 6, 6, 5 + | And -> 11, 11, 11 + | Or -> 12, 12, 12 + | Assign -> 14, 13, 14 + | Comma -> 15, 15, 14 + | PreIncr | PostIncr | PreDecr | PostDecr | Not | BNot -> raise (Invalid_argument "prec_of_op2") + +and prec_of_op1 op = + let open Constant in + match op with + | PreDecr | PreIncr | Not | BNot -> 2 + | PostDecr | PostIncr -> 1 + | _ -> raise (Invalid_argument "prec_of_op1") + +and is_prefix op = + let open Constant in + match op with + | PreDecr | PreIncr | Not | BNot -> true + | PostDecr | PostIncr -> false + | _ -> raise (Invalid_argument "is_prefix") + +(* The precedence level [curr] is the maximum precedence the current node should + * have. If it doesn't, then it should be parenthesized. Lower numbers bind + * tighter. *) +and paren_if curr mine doc = + if curr < mine then + group (lparen ^^ doc ^^ rparen) + else + doc + +(* [e] is an operand of [op]; is this likely to trigger GCC's -Wparentheses? If + * so, downgrade the current precedence to 0 to force parenthses. *) +and defeat_Wparentheses op e prec = + let open Constant in + if not !Options.parentheses then + prec + else match op, e with + | (BShiftL | BShiftR | BXor | BOr | BAnd), Op2 ((Add | Sub | BOr | BXor), _, _) -> + 0 + | _ -> + prec + +and p_expr' curr = function + | Op1 (op, e1) -> + let mine = prec_of_op1 op in + let e1 = p_expr' mine e1 in + paren_if curr mine (if is_prefix op then print_op op ^^ e1 else e1 ^^ print_op op) + | Op2 (op, e1, e2) -> + let mine, left, right = prec_of_op2 op in + let left = defeat_Wparentheses op e1 left in + let right = defeat_Wparentheses op e2 right in + let e1 = p_expr' left e1 in + let e2 = p_expr' right e2 in + paren_if curr mine (e1 ^/^ print_op op ^^ jump e2) + | Index (e1, e2) -> + let mine, left, right = 1, 1, 15 in + let e1 = p_expr' left e1 in + let e2 = p_expr' right e2 in + paren_if curr mine (e1 ^^ lbracket ^^ e2 ^^ rbracket) + | Assign (e1, e2) -> + let mine, left, right = 14, 13, 14 in + let e1 = p_expr' left e1 in + let e2 = p_expr' right e2 in + paren_if curr mine (group (e1 ^/^ equals) ^^ jump e2) + | Call (e, es) -> + let mine, left, arg = 1, 1, 14 in + let e = p_expr' left e in + let es = nest 2 (separate_map (comma ^^ break 1) (fun e -> group (p_expr' arg e)) es) in + paren_if curr mine (e ^^ lparen ^^ es ^^ rparen) + | Literal s -> + dquote ^^ string s ^^ dquote + | Constant (w, s) -> + string s ^^ if K.is_unsigned w then string "U" else empty + | Name s -> + string s + | Cast (t, e) -> + let mine, right = 2, 2 in + let e = group (p_expr' right e) in + let t = p_type_name t in + paren_if curr mine (lparen ^^ t ^^ rparen ^^ e) + | Type t -> + p_type_name t + | Sizeof e -> + let mine, right = 2, 2 in + let e = p_expr' right e in + paren_if curr mine (string "sizeof" ^^ space ^^ e) + | SizeofType t -> + string "sizeof" ^^ group (lparen ^^ p_type_spec t ^^ rparen) + | Address e -> + let mine, right = 2, 2 in + let e = p_expr' right e in + paren_if curr mine (ampersand ^^ e) + | Deref e -> + let mine, right = 2, 2 in + let e = p_expr' right e in + paren_if curr mine (star ^^ e) + | Member _ | MemberP _ -> + failwith "[p_expr']: not implemented" + | Bool b -> + string (string_of_bool b) + | CompoundLiteral (t, init) -> + (* NOTE: always parenthesize compound literal no matter what, because GCC + * parses an application of a function to a compound literal as an n-ary + * application. *) + parens_with_nesting ( + lparen ^^ p_type_name t ^^ rparen ^^ + braces_with_nesting (separate_map (comma ^^ break1) p_init init) + ) + | MemberAccess (expr, member) -> + p_expr' 1 expr ^^ dot ^^ string member + | MemberAccessPointer (expr, member) -> + p_expr' 1 expr ^^ string "->" ^^ string member + | InlineComment (s, e, s') -> + surround 2 1 (p_comment s) (p_expr' curr e) (p_comment s') + +and p_comment s = + (* TODO: escape *) + string "/* " ^^ nest 2 (flow space (words s)) ^^ string " */" + + +and p_expr e = p_expr' 15 e + +and p_init (i: init) = + match i with + | Designated (designator, i) -> + group (p_designator designator ^^ space ^^ equals ^^ space ^^ p_init i) + | InitExpr e -> + p_expr' 14 e + | Initializer inits -> + let inits = + if List.length inits > 4 then + flow (comma ^^ break1) (List.map p_init inits) + else + separate_map (comma ^^ break1) p_init inits + in + braces_with_nesting inits + +and p_designator = function + | Dot ident -> + dot ^^ string ident + | Bracket i -> + lbracket ^^ int i ^^ rbracket + +and p_decl_and_init (decl, init) = + group (p_type_declarator decl ^^ match init with + | Some init -> + space ^^ equals ^^ jump (p_init init) + | None -> + empty) + +and p_declaration (spec, stor, decl_and_inits) = + let stor = match stor with Some stor -> p_storage_spec stor ^^ space | None -> empty in + stor ^^ group (p_type_spec spec) ^/^ + separate_map (comma ^^ break 1) p_decl_and_init decl_and_inits + +(* This is abusing the definition of a compound statement to ensure it is printed with braces. *) +let nest_if f stmt = + match stmt with + | Compound _ -> + hardline ^^ f stmt + | _ -> + nest 2 (hardline ^^ f stmt) + +(* A note on the classic dangling else problem. Remember that this is how things + * are parsed (note the indentation): + * + * if (foo) + * if (bar) + * ... + * else + * ... + * + * And remember that this needs braces: + * + * if (foo) { + * if (bar) + * ... + * } else + * ... + * + * [protect_solo_if] adds braces to the latter case. However, GCC, unless + * -Wnoparentheses is given, will produce a warning for the former case. + * [protect_ite_if_needed] adds braces to the former case, when the user has + * requested the extra, unnecessary parentheses needed to silence -Wparentheses. + * *) +let protect_solo_if s = + match s with + | If _ -> Compound [ s ] + | _ -> s + +let protect_ite_if_needed s = + match s with + | IfElse _ when !Options.parentheses -> Compound [ s ] + | _ -> s + +let p_or p x = + match x with + | Some x -> p x + | None -> empty + +let rec p_stmt (s: stmt) = + (* [p_stmt] is responsible for appending [semi] and calling [group]! *) + match s with + | Compound stmts -> + lbrace ^^ nest 2 (hardline ^^ separate_map hardline p_stmt stmts) ^^ + hardline ^^ rbrace + | Expr expr -> + group (p_expr expr ^^ semi) + | Comment s -> + group (string "/*" ^/^ separate break1 (words s) ^/^ string "*/") + | For (decl, e2, e3, stmt) -> + let init = match decl with + | `Decl decl -> p_declaration decl + | `Expr expr -> p_expr expr + | `Skip -> empty + in + group (string "for" ^/^ lparen ^^ nest 2 ( + init ^^ semi ^^ break1 ^^ + p_expr e2 ^^ semi ^^ break1 ^^ + p_expr e3 + ) ^^ rparen) ^^ nest_if p_stmt stmt + | If (e, stmt) -> + group (string "if" ^/^ lparen ^^ p_expr e ^^ rparen) ^^ + nest_if p_stmt (protect_ite_if_needed stmt) + | IfElse (e, s1, s2) -> + group (string "if" ^/^ lparen ^^ p_expr e ^^ rparen) ^^ + nest_if p_stmt (protect_solo_if s1) ^^ hardline ^^ + string "else" ^^ + (match s2 with + | If _ | IfElse _ -> + space ^^ p_stmt s2 + | _ -> + nest_if p_stmt s2) + | While (e, s) -> + group (string "while" ^/^ lparen ^^ p_expr e ^^ rparen) ^^ + nest_if p_stmt s + | Return None -> + group (string "return" ^^ semi) + | Return (Some e) -> + group (string "return" ^^ jump (p_expr e ^^ semi)) + | DeclStmt d -> + group (p_declaration d ^^ semi) + | Switch (e, branches, default) -> + group (string "switch" ^/^ lparen ^^ p_expr e ^^ rparen) ^/^ + braces_with_nesting ( + separate_map hardline (fun (e, s) -> + group (string "case" ^/^ p_expr e ^^ colon) ^^ nest 2 ( + hardline ^^ p_stmt s + ) + ) branches ^^ hardline ^^ + string "default:" ^^ nest 2 ( + hardline ^^ p_stmt default + ) + ) + | Break -> + string "break" ^^ semi + +let p_comments cs = + separate_map hardline (fun c -> string ("/*\n" ^ c ^ "\n*/")) cs ^^ + if List.length cs > 0 then hardline else empty + +let p_decl_or_function (df: declaration_or_function) = + match df with + | Decl (comments, d) -> + p_comments comments ^^ + group (p_declaration d ^^ semi) + | Function (comments, inline, d, stmt) -> + p_comments comments ^^ + let inline = if inline then string "inline" ^^ space else empty in + inline ^^ group (p_declaration d) ^/^ p_stmt stmt + +let print_files = + PrintCommon.print_files p_decl_or_function diff --git a/project/src/kremlin/PrintCommon.ml b/project/src/kremlin/PrintCommon.ml new file mode 100644 index 0000000..84e2b40 --- /dev/null +++ b/project/src/kremlin/PrintCommon.ml @@ -0,0 +1,96 @@ +open PPrint +open Constant +open Common + +let break1 = break 1 + +let jump ?(indent=2) body = + jump indent 1 body + +let parens_with_nesting contents = + surround 2 0 lparen contents rparen + +let braces_with_nesting contents = + surround 2 1 lbrace contents rbrace + +let int i = string (string_of_int i) + +let print_width = function + | UInt8 -> string "uint8" + | UInt16 -> string "uint16" + | UInt32 -> string "uint32" + | UInt64 -> string "uint64" + | CInt -> string "krml_checked_int" + | Int8 -> string "int8" + | Int16 -> string "int16" + | Int32 -> string "int32" + | Int64 -> string "int64" + | Bool -> string "bool" + +let print_constant = function + | w, s -> string s ^^ print_width w + +let print_op = function + | Add -> string "+" + | AddW -> string "+w" + | Sub -> string "-" + | SubW -> string "-" + | Div -> string "/" + | DivW -> string "/w" + | Mult -> string "*" + | MultW -> string "*w" + | Mod -> string "%" + | BOr -> string "|" + | BAnd -> string "&" + | BXor -> string "^" + | BNot -> string "~" + | BShiftL -> string "<<" + | BShiftR -> string ">>" + | Eq -> string "==" + | Neq -> string "!=" + | Lt -> string "<" + | Lte -> string "<=" + | Gt -> string ">" + | Gte -> string ">=" + | And -> string "&&" + | Or -> string "||" + | Xor -> string "^" + | Not -> string "!" + | PostIncr | PreIncr -> string "++" + | PostDecr | PreDecr -> string "--" + | Assign -> string "=" + | Comma -> string "," + +let print_cc = function + | CDecl -> string "__cdecl" + | StdCall -> string "__stdcall" + | FastCall -> string "__fastcall" + +let print_lident (idents, ident) = + separate_map dot string (idents @ [ ident ]) + +let print_program p decls = + separate_map (hardline ^^ hardline) p decls + +let print_files print_decl files = + separate_map hardline (fun (f, p) -> + string (String.uppercase f) ^^ colon ^^ jump (print_program print_decl p) + ) files + +let printf_of_pprint f = + fun buf t -> + PPrint.ToBuffer.compact buf (f t) + +let printf_of_pprint_pretty f = + fun buf t -> + PPrint.ToBuffer.pretty 0.95 Utils.twidth buf (f t) + +let pdoc buf d = + PPrint.ToBuffer.compact buf d + +let english_join s = + match List.rev s with + | [] -> empty + | [ x ] -> x + | last :: first -> + group (separate (comma ^^ break1) (List.rev first) ^/^ string "and" ^/^ last) diff --git a/project/src/kremlin/Utils.ml b/project/src/kremlin/Utils.ml new file mode 100644 index 0000000..1ac1256 --- /dev/null +++ b/project/src/kremlin/Utils.ml @@ -0,0 +1,73 @@ +let try_finally f h = let result = + try + f () + with e -> + h (); + raise e + in + h (); + result + +let with_open_in file_path f = + let c = open_in_bin file_path in + try_finally (fun () -> + f c + ) (fun () -> + close_in c + ) + +let with_open_out file_path f = + let c = open_out file_path in + try_finally (fun () -> + f c + ) (fun () -> + close_out c + ) + + +let cp dst src = with_open_out dst (fun oc -> + with_open_in src (fun ic -> + let buf = Bytes.create 2048 in + while + let l = input ic buf 0 2048 in + if l > 0 then begin + output oc buf 0 l; + true + end else + false + do () done + )) + + +let read ic = + let buf = Buffer.create 4096 in + let s = String.create 2048 in + while begin + let l = input ic s 0 (String.length s) in + if l > 0 then begin + Buffer.add_string buf (String.sub s 0 l); + true + end else begin + false + end + end do () done; + Buffer.contents buf + +let file_get_contents f = + with_open_in f read + +(** Sniff the size of the terminal for optimal use of the width. *) +let theight, twidth = + let height, width = ref 0, ref 0 in + match + Scanf.sscanf (List.hd (Process.read_stdout "stty" [|"size"|])) "%d %d" (fun h w -> + height := h; + width := w); + !height, !width + with + | exception _ -> + 24, 80 + | 0, 0 -> + 24, 80 + | h, w -> + h, w diff --git a/project/src/prologue.h b/project/src/prologue.h new file mode 100644 index 0000000..239161f --- /dev/null +++ b/project/src/prologue.h @@ -0,0 +1,83 @@ +/* A forward declaration of [block] -- see below. */ + +struct block; + +/* -------------------------------------------------------------------------- */ + +/* The type [univ] is the universal type of the values that we manipulate. + A value is either an integer or a pointer to a heap-allocated memory + block. A C union is used to represent this disjunction. There is no tag + to distinguish between the two alternatives! The source program had + better be well-typed. */ + +typedef union { + + /* Either this is an integer... */ + int literal; + + /* ... or this is a pointer to a block. */ + struct block* pointer; + +} univ; + +/* -------------------------------------------------------------------------- */ + +/* The struct [block] describes the uniform layout of a heap-allocated + memory block. A block begins with an integer tag and continues with a + sequence of fields of type [univ]. */ + +struct block { + + /* Every memory block begins with an integer tag. */ + int tag; + + /* Then, we have a sequence of fields, whose number depends on the tag. + This idiom is known in C99 as a flexible array member. */ + univ data[]; + +}; + +/* -------------------------------------------------------------------------- */ + +/* Basic operations on memory blocks. */ + +/* The macro [ALLOC(n)] allocates a block of [n] fields, and is used in a + context where an expression of type [univ] is expected. We use a C + "compound literal" containing a "designated initializer" to indicate + that we wish to choose the "pointer" side of the union. We implicitly + assume that the compiler inserts no padding between the "tag" and "data" + parts of a memory block, so [(n + 1) * sizeof(univ)] is enough space to + hold a block of [n] fields. */ + +#define ALLOC(n) \ + (univ) { .pointer = malloc((n + 1) * sizeof(univ)) } + +/* In the following macros, [u] has type [univ], so [u.pointer] has type + [struct block] and is (or should be) the address of a memory block. + [i] is a field number; the numbering of fields is 0-based. */ + +#define GET_TAG(u) \ + (u.pointer->tag) + +#define SET_TAG(u,t) \ + (u.pointer->tag = t) + +#define GET_FIELD(u,i) \ + (u.pointer->data[i]) + +#define SET_FIELD(u,i,v) \ + (u.pointer->data[i]=v) + +/* -------------------------------------------------------------------------- */ + +/* Basic operations on integers. */ + +/* The macro [FROM_INT(i)] converts the integer [i] to the type [univ]. */ + +#define FROM_INT(i) \ + (univ) { .literal = i } + +/* The macro [TO_INT(u)] converts [u] to the type [int]. */ + +#define TO_INT(u) \ + u.literal diff --git a/project/src/test/.gitignore b/project/src/test/.gitignore new file mode 100644 index 0000000..55b60e0 --- /dev/null +++ b/project/src/test/.gitignore @@ -0,0 +1 @@ +test.native diff --git a/project/src/test/.merlin b/project/src/test/.merlin new file mode 100644 index 0000000..1a1fd25 --- /dev/null +++ b/project/src/test/.merlin @@ -0,0 +1 @@ +B _build diff --git a/project/src/test/Makefile b/project/src/test/Makefile new file mode 100644 index 0000000..d66fadf --- /dev/null +++ b/project/src/test/Makefile @@ -0,0 +1,21 @@ +OCAMLBUILD := ocamlbuild -use-ocamlfind -classic-display +TARGET := test.native +TESTS := ../tests + +.PHONY: all test expected clean + +all: + @ $(OCAMLBUILD) $(TARGET) + +test: all + @ $(MAKE) -C .. + @ ./$(TARGET) --verbosity 1 + +expected: all + @ $(MAKE) -C .. + @ ./$(TARGET) --verbosity 1 --create-expected + +clean: + @ rm -f *~ *.native *.byte + @ $(OCAMLBUILD) -clean + @ rm -f $(TESTS)/*.c $(TESTS)/*.exe $(TESTS)/*.out $(TESTS)/*.err $(TESTS)/*~ diff --git a/project/src/test/_tags b/project/src/test/_tags new file mode 100644 index 0000000..749aa28 --- /dev/null +++ b/project/src/test/_tags @@ -0,0 +1,3 @@ +true: \ + warn(A-44), \ + package(unix) diff --git a/project/src/test/auxiliary.ml b/project/src/test/auxiliary.ml new file mode 100644 index 0000000..a7bd0c4 --- /dev/null +++ b/project/src/test/auxiliary.ml @@ -0,0 +1,138 @@ +type filename = string +type command = string + +let has_suffix suffix name = + Filename.check_suffix name suffix + +let fail format = + Printf.kprintf failwith format + +let try_finally f h = + let result = try f() with e -> h(); raise e in + h(); result + +let with_process_code_result command (f : in_channel -> 'a) : int * 'a = + let ic = Unix.open_process_in command in + set_binary_mode_in ic false; + match f ic with + | exception e -> + ignore (Unix.close_process_in ic); raise e + | result -> + match Unix.close_process_in ic with + | Unix.WEXITED code -> + code, result + | Unix.WSIGNALED _ + | Unix.WSTOPPED _ -> + 99 (* arbitrary *), result + +let with_process_result command (f : in_channel -> 'a) : 'a = + let code, result = with_process_code_result command f in + if code = 0 then + result + else + fail "Command %S failed with exit code %d." command code + +let with_open_in filename (f : in_channel -> 'a) : 'a = + let ic = open_in filename in + try_finally + (fun () -> f ic) + (fun () -> close_in_noerr ic) + +let with_open_out filename (f : out_channel -> 'a) : 'a = + let oc = open_out filename in + try_finally + (fun () -> f oc) + (fun () -> close_out_noerr oc) + +let chunk_size = + 16384 + +let exhaust (ic : in_channel) : string = + let buffer = Buffer.create chunk_size in + let chunk = Bytes.create chunk_size in + let rec loop () = + let length = input ic chunk 0 chunk_size in + if length = 0 then + Buffer.contents buffer + else begin + Buffer.add_subbytes buffer chunk 0 length; + loop() + end + in + loop() + +let read_whole_file filename = + with_open_in filename exhaust + +let absolute_directory (path : string) : string = + try + let pwd = Sys.getcwd() in + Sys.chdir path; + let result = Sys.getcwd() in + Sys.chdir pwd; + result + with Sys_error _ -> + Printf.fprintf stderr "Error: the directory %s does not seem to exist.\n" path; + exit 1 + +let get_number_of_cores () = + try match Sys.os_type with + | "Win32" -> + int_of_string (Sys.getenv "NUMBER_OF_PROCESSORS") + | _ -> + with_process_result "getconf _NPROCESSORS_ONLN" (fun ic -> + let ib = Scanf.Scanning.from_channel ic in + Scanf.bscanf ib "%d" (fun n -> n) + ) + with + | Not_found + | Sys_error _ + | Failure _ + | Scanf.Scan_failure _ + | End_of_file + | Unix.Unix_error _ -> + 1 + +let silent command : command = + command ^ " >/dev/null 2>/dev/null" + +(* [groups eq xs] segments the list [xs] into a list of groups, where several + consecutive elements belong in the same group if they are equivalent in the + sense of the function [eq]. *) + +(* The auxiliary function [groups1] deals with the case where we have an open + group [group] of which [x] is a member. The auxiliary function [group0] + deals with the case where we have no open group. [groups] is the list of + closed groups found so far, and [ys] is the list of elements that remain to + be examined. *) + +let rec groups1 eq groups x group ys = + match ys with + | [] -> + group :: groups + | y :: ys -> + if eq x y then + groups1 eq groups x (y :: group) ys + else + groups0 eq (List.rev group :: groups) (y :: ys) + +and groups0 eq groups ys = + match ys with + | [] -> + groups + | y :: ys -> + groups1 eq groups y [y] ys + +let groups eq (xs : 'a list) : 'a list list = + List.rev (groups0 eq [] xs) + +(* [sep ss] separates the nonempty strings in the list [ss] with a space, + and concatenates everything, producing a single string. *) + +let sep (ss : string list) : string = + let ss = List.filter (fun s -> String.length s > 0) ss in + match ss with + | [] -> + "" + | s :: ss -> + List.fold_left (fun s1 s2 -> s1 ^ " " ^ s2) s ss diff --git a/project/src/test/test.ml b/project/src/test/test.ml new file mode 100644 index 0000000..fe98195 --- /dev/null +++ b/project/src/test/test.ml @@ -0,0 +1,288 @@ +open Sys +open Array +open List +open Filename +open Printf +open Auxiliary + +(* -------------------------------------------------------------------------- *) + +(* Settings. *) + +let create_expected = + ref false + +let verbosity = + ref 0 + +let usage = + sprintf "Usage: %s\n" argv.(0) + +let spec = Arg.align [ + "--create-expected", Arg.Set create_expected, + " recreate the expected-output files"; + "--verbosity", Arg.Int ((:=) verbosity), + " set the verbosity level (0-2)"; +] + +let () = + Arg.parse spec (fun _ -> ()) usage + +let create_expected = + !create_expected + +let verbosity = + !verbosity + +(* -------------------------------------------------------------------------- *) + +(* Logging. *) + +(* 0 is minimal verbosity; + 1 shows some progress messages; + 2 is maximal verbosity. *) + +let log level format = + kprintf (fun s -> + if level <= verbosity then + print_string s + ) format + +(* Extend [fail] to display an information message along the way. + The message is immediately emitted by the worker, depending on + the verbosity level, whereas the failure message is sent back + to the master. *) + +let fail id format = + log 1 "[FAIL] %s\n%!" id; + fail format + +(* When issuing an external command, log it along the way. *) + +let command cmd = + log 2 "%s\n%!" cmd; + command cmd + +(* -------------------------------------------------------------------------- *) + +(* Paths. *) + +let root = + absolute_directory ".." + +let src = + root + +let good = + root ^ "/tests" + +let good_slash filename = + good ^ "/" ^ filename + +let joujou = + src ^ "/joujou" + +let gcc = + sprintf "gcc -O2 -I %s" src + +(* -------------------------------------------------------------------------- *) + +(* Test files. *) + +let thisfile = + "this input file" + +let lambda basename = + basename ^ ".lambda" + +(* -------------------------------------------------------------------------- *) + +(* Test inputs and outputs. *) + +(* A test input is a basename, without the .lambda extension. *) + +type input = + | PositiveTest of filename + +type inputs = input list + +let print_input = function + | PositiveTest basename -> + basename + +type outcome = + | OK + | Fail of string (* message *) + +let print_outcome = function + | OK -> + "" + | Fail msg -> + msg + +type output = + input * outcome + +type outputs = output list + +let print_output (input, outcome) = + printf "\n[FAIL] %s\n%s" + (print_input input) + (print_outcome outcome) + +(* -------------------------------------------------------------------------- *) + +(* Auxiliary functions. *) + +let check_expected directory id result expected = + let cmd = sep ["cd"; directory; "&&"; "cp"; "-f"; result; expected] in + let copy() = + if command cmd <> 0 then + fail id "Failed to create %s.\n" expected + in + (* If we are supposed to create the [expected] file, do so. *) + if create_expected then + copy() + (* Otherwise, check that the file [expected] exists. If it does not exist, + create it by renaming [result] to [expected], then fail and invite the + user to review the newly created file. *) + else if not (file_exists (directory ^ "/" ^ expected)) then begin + copy(); + let cmd = sep ["more"; directory ^ "/" ^ expected] in + fail id "The file %s did not exist.\n\ + I have just created it. Please review it.\n %s\n" + expected cmd + end + +(* -------------------------------------------------------------------------- *) + +(* Running a positive test. *) + +(* + Conventions: + The file %.c stores the output of joujou. + The file %.exe is the compiled program produced by gcc. + The file %.out stores the output of the compiled program. + The file %.exp stores the expected output. + The file %.err stores error output (at several stages). + *) + +let process_positive_test (base : string) : unit = + + (* Display an information message. *) + log 2 "Testing %s...\n%!" base; + + (* A suggested command. *) + let more filename = + sep ["more"; good_slash filename] + in + + (* Run joujou. *) + let source = lambda base in + let c = base ^ ".c" in + let errors = base ^ ".err" in + let cmd = sep ( + "cd" :: good :: "&&" :: + joujou :: source :: sprintf ">%s" c :: sprintf "2>%s" errors :: [] + ) in + if command cmd <> 0 then + fail base "joujou fails on this source file.\n\ + To see the source file:\n %s\n\ + To see the error messages:\n %s\n" + (more source) (more errors); + + (* Run the C compiler. *) + let binary = base ^ ".exe" in + let errors = base ^ ".err" in + let cmd = sep ( + "cd" :: good :: "&&" :: + gcc :: c :: "-o" :: binary :: sprintf "2>%s" errors :: [] + ) in + if command cmd <> 0 then + fail base "The C compiler rejects the C file.\n\ + To see the C file:\n %s\n\ + To see the compiler's error messages:\n %s\n" + (more c) (more errors); + + (* Run the compiled program. *) + let out = base ^ ".out" in + let cmd = sep ( + "cd" :: good :: "&&" :: + sprintf "./%s" binary :: sprintf ">%s" out :: "2>&1" :: [] + ) in + if command cmd <> 0 then + fail base "The compiled program fails.\n\ + To see its output:\n %s\n" (more out); + + (* Check that the file [exp] exists. *) + let exp = base ^ ".exp" in + check_expected good base out exp; + + (* Check that the output coincides with what was expected. *) + let cmd = sep ("cd" :: good :: "&&" :: "diff" :: exp :: out :: []) in + if command (silent cmd) <> 0 then + fail base "joujou accepts this input file, but produces incorrect output.\n\ + To see the difference:\n (%s)\n" + cmd; + + (* Succeed. *) + log 1 "[OK] %s\n%!" base + +(* -------------------------------------------------------------------------- *) + +(* Running a test. *) + +let process input : output = + try + begin match input with + | PositiveTest basenames -> + process_positive_test basenames + end; + input, OK + with Failure msg -> + input, Fail msg + +(* -------------------------------------------------------------------------- *) + +(* [run] runs a bunch of tests in sequence. *) + +let run (inputs : inputs) : outputs = + flush stdout; flush stderr; + let outputs = map process inputs in + outputs + +(* -------------------------------------------------------------------------- *) + +(* Main. *) + +(* Menhir can accept several .mly files at once. By convention, if several + files have the same name up to a numeric suffix, then they belong in a + single group and should be fed together to Menhir. *) + +let inputs directory : filename list = + readdir directory + |> to_list + |> filter (has_suffix ".lambda") + |> map chop_extension + |> sort compare + +let positive : inputs = + inputs good + |> map (fun basename -> PositiveTest basename) + +let inputs = + positive + +let outputs : outputs = + printf "Preparing to run %d tests...\n%!" (length inputs); + run inputs + +let successful, failed = + partition (fun (_, o) -> o = OK) outputs + +let () = + printf "%d out of %d tests are successful.\n" + (length successful) (length inputs); + failed |> iter (fun (input, outcome) -> + printf "\n[FAIL] %s\n%s" (print_input input) (print_outcome outcome) + ) diff --git a/project/src/tests/.gitignore b/project/src/tests/.gitignore new file mode 100644 index 0000000..9038d29 --- /dev/null +++ b/project/src/tests/.gitignore @@ -0,0 +1,4 @@ +*.err +*.out +*.exe +*.c diff --git a/project/src/tests/bool.exp b/project/src/tests/bool.exp new file mode 100644 index 0000000..829400e --- /dev/null +++ b/project/src/tests/bool.exp @@ -0,0 +1,20 @@ +1 +0 +1 +0 +0 +24 +120 +8 +13 +5 +0 +1 +7 +1 +1 +2 +6 +24 +120 +42 diff --git a/project/src/tests/bool.lambda b/project/src/tests/bool.lambda new file mode 100644 index 0000000..6eaf5da --- /dev/null +++ b/project/src/tests/bool.lambda @@ -0,0 +1,88 @@ +(* Thunks. *) +let force = fun x -> x 0 in +(* Church Booleans. *) +let false = fun x -> fun y -> y in +let true = fun x -> fun y -> x in +let k = true in +let cond = fun p -> fun x -> fun y -> p x y in +let forcecond = fun p -> fun x -> fun y -> force (p x y) in +let bool2int = fun b -> cond b 1 0 in +let _ = print (bool2int true) in +let _ = print (bool2int false) in +(* Church pairs. *) +let pair = fun x -> fun y -> fun p -> cond p x y in +let fst = fun p -> p true in +let snd = fun p -> p false in +(* Church naturals. *) +let zero = fun f -> fun x -> x in +let one = fun f -> fun x -> f x in +let plus = fun m -> fun n -> fun f -> fun x -> m f (n f x) in +let two = plus one one in +let three = plus one two in +let four = plus two two in +let five = plus four one in +let six = plus four two in +let succ = plus one in +let mult = fun m -> fun n -> fun f -> m (n f) in +let exp = fun m -> fun n -> n m in +let is_zero = fun n -> n (k false) true in +let convert = fun n -> n (fun x -> x + 1) 0 in +let _ = print (bool2int (is_zero zero)) in +let _ = print (bool2int (is_zero one)) in +let _ = print (bool2int (is_zero two)) in +(* Factorial, based on Church naturals. *) +let loop = fun p -> + let n = succ (fst p) in + pair n (mult n (snd p)) +in +let fact = fun n -> + snd (n loop (pair zero one)) +in +let _ = print (convert (fact four)) in +let _ = print (convert (fact five)) in +(* Fibonacci, based on Church naturals. *) +let loop = fun p -> + let fib1 = fst p in + pair (plus fib1 (snd p)) fib1 +in +let fib = fun n -> + snd (n loop (pair one one)) +in +let _ = print (convert (fib five)) in +let _ = print (convert (fib six)) in +(* Predecessor. *) +let loop = fun p -> + let pred = fst p in + pair (succ pred) pred +in +let pred = fun n -> + snd (n loop (pair zero zero)) +in +let _ = print (convert (pred six)) in +(* Comparison, yielding a Church Boolean. *) +let geq = fun m -> fun n -> + m pred n (k false) true +in +let _ = print (bool2int (geq four six)) in +let _ = print (bool2int (geq six one)) in +(* Iteration. *) +let iter = fun f -> fun n -> + n f (f one) +in +(* Ackermann's function. *) +let ack = fun n -> n iter succ n in +let _ = print (convert (ack two)) in +(* A fixpoint. *) +let fix = fun f -> (fun y -> f (fun z -> y y z)) (fun y -> f (fun z -> y y z)) in +(* Another version of fact. *) +let fact = fun n -> + fix (fun fact -> fun n -> forcecond (is_zero n) (fun _ -> one) (fun _ -> mult n (fact (pred n)))) n +in +let _ = print (convert (fact zero)) in +let _ = print (convert (fact one)) in +let _ = print (convert (fact two)) in +let _ = print (convert (fact three)) in +let _ = print (convert (fact four)) in +let _ = print (convert (fact five)) in + +print 42 diff --git a/project/src/tests/church.exp b/project/src/tests/church.exp new file mode 100755 index 0000000..d81cc07 --- /dev/null +++ b/project/src/tests/church.exp @@ -0,0 +1 @@ +42 diff --git a/project/src/tests/church.lambda b/project/src/tests/church.lambda new file mode 100644 index 0000000..0cbd2e6 --- /dev/null +++ b/project/src/tests/church.lambda @@ -0,0 +1,16 @@ +let i = fun x -> x in +let k = fun x -> fun y -> x in +let zero = fun f -> i in +let one = fun f -> fun x -> f x in +let plus = fun m -> fun n -> fun f -> fun x -> m f (n f x) in +let succ = plus one in +let mult = fun m -> fun n -> fun f -> m (n f) in +let exp = fun m -> fun n -> n m in +let two = succ one in +let three = succ two in +let six = mult two three in +let seven = succ six in +let twenty_one = mult three seven in +let forty_two = mult two twenty_one in +let convert = fun n -> n (fun x -> x + 1) 0 in +print (convert forty_two) diff --git a/project/src/tests/hello.exp b/project/src/tests/hello.exp new file mode 100755 index 0000000..d81cc07 --- /dev/null +++ b/project/src/tests/hello.exp @@ -0,0 +1 @@ +42 diff --git a/project/src/tests/hello.lambda b/project/src/tests/hello.lambda new file mode 100644 index 0000000..86f387e --- /dev/null +++ b/project/src/tests/hello.lambda @@ -0,0 +1 @@ +print (21 + 21) diff --git a/project/src/tests/loop/loop.lambda b/project/src/tests/loop/loop.lambda new file mode 100644 index 0000000..0629593 --- /dev/null +++ b/project/src/tests/loop/loop.lambda @@ -0,0 +1,10 @@ +(* This program is supposed to never terminate. + This can actually work if the C compiler is + smart enough to recognize and optimize tail + calls. It works for me with clang but not with + gcc, for some reason. *) +let rec loop = fun x -> + let _ = print x in + loop (x + 1) +in +loop 0 diff --git a/project/src/tests/printprint.exp b/project/src/tests/printprint.exp new file mode 100755 index 0000000..daaac9e --- /dev/null +++ b/project/src/tests/printprint.exp @@ -0,0 +1,2 @@ +42 +42 diff --git a/project/src/tests/printprint.lambda b/project/src/tests/printprint.lambda new file mode 100644 index 0000000..f0501dd --- /dev/null +++ b/project/src/tests/printprint.lambda @@ -0,0 +1,2 @@ +(* Because [print] returns its argument, this program should print 42 twice. *) +print (print 42) diff --git a/project/sujet.pdf b/project/sujet.pdf new file mode 100644 index 0000000..b7f0a6d Binary files /dev/null and b/project/sujet.pdf differ diff --git a/slides/yrg-00-introduction.pdf b/slides/yrg-00-introduction.pdf new file mode 100644 index 0000000..d232d8e Binary files /dev/null and b/slides/yrg-00-introduction.pdf differ diff --git a/slides/yrg-01-type-inference.pdf b/slides/yrg-01-type-inference.pdf new file mode 100644 index 0000000..c3a22ea Binary files /dev/null and b/slides/yrg-01-type-inference.pdf differ