diff --git a/projet/src/.merlin b/projet/src/.merlin new file mode 100644 index 0000000..7de32a3 --- /dev/null +++ b/projet/src/.merlin @@ -0,0 +1,9 @@ +S kremlin +S alphalib +B _build +B _build/kremlin +B _build/alphalib +PKG unix +PKG process +PKG pprint +PKG ppx_deriving.std diff --git a/projet/src/CPS.ml b/projet/src/CPS.ml new file mode 100644 index 0000000..9b3d87d --- /dev/null +++ b/projet/src/CPS.ml @@ -0,0 +1,7 @@ +(* The source calculus. *) +module S = Lambda +(* The target calculus. *) +module T = Tail + +let cps_term (t : S.term) : T.term = + assert false diff --git a/projet/src/CPS.mli b/projet/src/CPS.mli new file mode 100644 index 0000000..2497ddd --- /dev/null +++ b/projet/src/CPS.mli @@ -0,0 +1,4 @@ +(* Through a CPS transformation, the surface language [Lambda] is translated + down to the intermediate language [Tail]. *) + +val cps_term: Lambda.term -> Tail.term diff --git a/projet/src/Cook.ml b/projet/src/Cook.ml new file mode 100644 index 0000000..89657ed --- /dev/null +++ b/projet/src/Cook.ml @@ -0,0 +1,54 @@ +open Error + +(* The source calculus. *) +module S = RawLambda +(* The target calculus. *) +module T = Lambda + +(* Environments map strings to atoms. *) +module Env = + Map.Make(String) + +(* [bind env x] creates a fresh atom [a] and extends the environment [env] + with a mapping of [x] to [a]. *) +let bind env x = + let a = Atom.fresh x in + Env.add x a env, a + +let rec cook_term env { S.place; S.value } = + match value with + | S.Var x -> + begin try + T.Var (Env.find x env) + with Not_found -> + error place "Unbound variable: %s" x + end + | S.Lam (x, t) -> + let env, x = bind env x in + T.Lam (T.NoSelf, x, cook_term env t) + | S.App (t1, t2) -> + T.App (cook_term env t1, cook_term env t2) + | S.Lit i -> + T.Lit i + | S.BinOp (t1, op, t2) -> + T.BinOp (cook_term env t1, op, cook_term env t2) + | S.Print t -> + T.Print (cook_term env t) + | S.Let (S.NonRecursive, x, t1, t2) -> + let t1 = cook_term env t1 in + let env, x = bind env x in + let t2 = cook_term env t2 in + T.Let (x, t1, t2) + | S.Let (S.Recursive, f, { S.value = S.Lam (x, t1); _ }, t2) -> + let env, f = bind env f in + let x, t1 = + let env, x = bind env x in + x, cook_term env t1 + in + let t2 = cook_term env t2 in + T.Let (f, T.Lam (T.Self f, x, t1), t2) + | S.Let (S.Recursive, _, { S.place; _ }, _) -> + error place "the right-hand side of 'let rec' must be a lambda-abstraction" + +let cook_term t = + cook_term Env.empty t diff --git a/projet/src/Cook.mli b/projet/src/Cook.mli new file mode 100644 index 0000000..617e408 --- /dev/null +++ b/projet/src/Cook.mli @@ -0,0 +1,15 @@ +(* This module translates [RawLambda] into [Lambda]. *) + +(* This involves ensuring that every name is properly bound (otherwise, an + error is reported) and switching from a representation of names as strings + to a representation of names as atoms. *) + +(* This also involves checking that the right-hand side of every [let] + construct is a function (otherwise, an error is reported) and switching + from a representation where [let] constructs can carry a [rec] annotation + to a representation where functions can carry such an annotation. *) + +(* This also involves dropping places (that is, source code locations), since + they are no longer used after this phase. *) + +val cook_term: RawLambda.term -> Lambda.term diff --git a/projet/src/Defun.ml b/projet/src/Defun.ml new file mode 100644 index 0000000..7264dac --- /dev/null +++ b/projet/src/Defun.ml @@ -0,0 +1,7 @@ +(* The source calculus. *) +module S = Tail +(* The target calculus. *) +module T = Top + +let defun_term (t : S.term) : T.program = + assert false diff --git a/projet/src/Defun.mli b/projet/src/Defun.mli new file mode 100644 index 0000000..0c02815 --- /dev/null +++ b/projet/src/Defun.mli @@ -0,0 +1,4 @@ +(* Through defunctionalization, the intermediate language [Tail] is translated + down to the next intermediate language, [Top]. *) + +val defun_term: Tail.term -> Top.program diff --git a/projet/src/Error.ml b/projet/src/Error.ml new file mode 100644 index 0000000..e4f1d8b --- /dev/null +++ b/projet/src/Error.ml @@ -0,0 +1,40 @@ +open Lexing + +type place = + position * position + +let place lexbuf : place = + lexbuf.lex_start_p, lexbuf.lex_curr_p + +let line p : int = + p.pos_lnum + +let column p : int = + p.pos_cnum - p.pos_bol + +let show place : string = + let startp, endp = place in + Printf.sprintf "File \"%s\", line %d, characters %d-%d" + startp.pos_fname + (line startp) + (column startp) + (endp.pos_cnum - startp.pos_bol) (* intentionally [startp.pos_bol] *) + +let display continuation header place format = + Printf.fprintf stderr "%s:\n" (show place); + Printf.kfprintf + continuation + stderr + (header ^^ format ^^ "\n%!") + +let error place format = + display + (fun _ -> exit 1) + "Error: " + place format + +let set_filename lexbuf filename = + lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = filename } + +let pp_place formatter _place = + Format.fprintf formatter "<>" diff --git a/projet/src/Error.mli b/projet/src/Error.mli new file mode 100644 index 0000000..b8ba5f9 --- /dev/null +++ b/projet/src/Error.mli @@ -0,0 +1,30 @@ +open Lexing + +(* A place is a pair of a start position and an end position. *) + +type place = + position * position + +(* [set_filename lexbuf filename] updates [lexbuf] to record the + fact that the current file name is [filename]. This file name + is later used in error messages. *) + +val set_filename: lexbuf -> string -> unit + +(* [place lexbuf] produces a pair of the current token's start and + end positions. This function is useful when reporting an error + during lexing. *) + +val place: lexbuf -> place + +(* [error place format ...] displays an error message and exits. + The error message is located at [place]. The error message + is composed based on [format] and the extra arguments [...]. *) + +val error: place -> ('a, out_channel, unit, 'b) format4 -> 'a + +(* [pp_place formatter place] prints a place. It is used by + [@@deriving show] for data structures that contain places. + As of now, it prints nothing. *) + +val pp_place: Format.formatter -> place -> unit diff --git a/projet/src/Finish.ml b/projet/src/Finish.ml new file mode 100644 index 0000000..08779b3 --- /dev/null +++ b/projet/src/Finish.ml @@ -0,0 +1,323 @@ +(* The source calculus. *) +module S = Top +(* The target calculus. *) +module T = C + +(* -------------------------------------------------------------------------- *) + +(* [interval i j f] constructs the list [[f i; f (i + 1); ...; f (j - 1)]]. *) + +let rec interval i j (f : int -> 'a) : 'a list = + if i < j then + f i :: interval (i + 1) j f + else + [] + +(* -------------------------------------------------------------------------- *) + +(* [index xs] constructs a list of pairs, where each element of [xs] is paired + with its index. Indices are 0-based. *) + +let index (xs : 'a list) : (int * 'a) list = + let n = List.length xs in + let indices = interval 0 n (fun i -> i) in + List.combine indices xs + +(* -------------------------------------------------------------------------- *) + +(* The number of fields of a block, not counting its tag. *) + +let block_num_fields b = + match b with + | S.Con (_, vs) -> + List.length vs + +(* -------------------------------------------------------------------------- *) + +(* A simple-minded way of ensuring that every atom is printed as a + distinct string is to concatenate the atom's hint and identity, + with an underscore in between. This is guaranteed to rule out + collisions. *) + +let var (x : S.variable) : T.ident = + Printf.sprintf "%s_%d" (Atom.hint x) (Atom.identity x) + +let evar (x : S.variable) : T.expr = + T.Name (var x) + +(* -------------------------------------------------------------------------- *) + +(* Predefined C types and functions. *) + +(* A universal type: every value is translated to a C value of type [univ]. + This is a union type (i.e., an untagged sum) of integers and pointers to + memory blocks. *) + +let univ : T.type_spec = + T.Named "univ" + +(* The type of integers. *) + +let int : T.type_spec = + T.Named "int" + +(* The type [char] appears in the type of [main]. *) + +let char : T.type_spec = + T.Named "char" + +let answer : T.type_spec = + int + (* Our functions never actually return, since they are tail recursive. + We use [int] as their return type, since this is the return type of + [main]. *) + +let exit : T.expr = + T.Name "exit" + +let printf : T.expr = + T.Name "printf" + +(* -------------------------------------------------------------------------- *) + +(* [declare x init] constructs a local variable declaration for a variable [x] + of type [univ]. [x] is optionally initialized according to [init]. *) + +let declare (x : S.variable) (init : T.init option) : T.declaration = + univ, None, [ T.Ident (var x), init ] + +(* -------------------------------------------------------------------------- *) + +(* Macro invocations. *) + +let macro m es : T.expr = + (* We disguise a macro invocation as a procedure call. *) + T.Call (T.Name m, es) + +(* -------------------------------------------------------------------------- *) + +(* Integer literals; conversions between [univ] and [int]. *) + +let iconst i : T.expr = + T.Constant (Constant.Int64, string_of_int i) + +let to_int v : T.expr = + macro "TO_INT" [ v ] + (* This is an unsafe conversion, of course. *) + +let from_int v : T.expr = + macro "FROM_INT" [ v ] + +(* -------------------------------------------------------------------------- *) + +(* The translation of values. *) + +let finish_op = function + | S.OpAdd -> + T.K.Add + | S.OpSub -> + T.K.Sub + | S.OpMul -> + T.K.Mult + | S.OpDiv -> + T.K.Div + +let rec finish_value (v : S.value) : T.expr = + match v with + | S.VVar x -> + evar x + | S.VLit i -> + from_int (iconst i) + | S.VBinOp (v1, op, v2) -> + from_int ( + T.Op2 ( + finish_op op, + to_int (finish_value v1), + to_int (finish_value v2) + ) + ) + +let finish_values vs = + List.map finish_value vs + +(* -------------------------------------------------------------------------- *) + +(* A macro for allocating a memory block. *) + +let alloc b : T.expr = + T.Call (T.Name "ALLOC", [ iconst (block_num_fields b) ]) + +(* -------------------------------------------------------------------------- *) + +(* Macros for reading and initializing the tag of a memory block. *) + +let read_tag (v : S.value) : T.expr = + macro "GET_TAG" [ finish_value v ] + +let set_tag (x : S.variable) (tag : S.tag) : T.stmt = + T.Expr (macro "SET_TAG" [ evar x; iconst tag ]) + +(* -------------------------------------------------------------------------- *) + +(* Macros for reading and setting a field in a memory block. *) + +let read_field (v : S.value) (i : int) : T.expr = + (* [i] is a 0-based field index. *) + macro "GET_FIELD" [ finish_value v; iconst i ] + +let read_field (v : S.value) (i, x) (t : T.stmt list) : T.stmt list = + (* [x] is a variable, which is declared and initialized with + the content of the [i]th field of the block [v]. *) + T.DeclStmt (declare x (Some (T.InitExpr (read_field v i)))) :: + t + +let read_fields (v : S.value) xs (t : T.stmt list) : T.stmt list = + (* [xs] are variables, which are declared and initialized with + the contents of the fields of the block [v]. *) + List.fold_right (read_field v) (index xs) t + +let set_field x i (v : S.value) : T.stmt = + T.Expr (macro "SET_FIELD" [ evar x; iconst i; finish_value v ]) + +(* -------------------------------------------------------------------------- *) + +(* A sequence of instructions for initializing a memory block. *) + +let init_block (x : S.variable) (b : S.block) : T.stmt list = + match b with + | S.Con (tag, vs) -> + T.Comment "Initializing a memory block:" :: + set_tag x tag :: + List.mapi (set_field x) vs + +(* -------------------------------------------------------------------------- *) + +(* Function calls, as expressions and as statements. *) + +let ecall f args : T.expr = + T.Call (f, args) + +let scall f args : T.stmt = + T.Expr (ecall f args) + +(* -------------------------------------------------------------------------- *) + +(* The translation of terms. *) + +let rec finish_term (t : S.term) : C.stmt = + match t with + | S.Exit -> + T.Compound [ + scall exit [ iconst 0 ] + ] + | S.TailCall (f, vs) -> + T.Return (Some (ecall (evar f) (finish_values vs))) + | S.Print (v, t) -> + T.Compound [ + scall printf [ T.Literal "%d\\n"; to_int (finish_value v) ]; + finish_term t + ] + | S.LetVal (x, v1, t2) -> + T.Compound [ + T.DeclStmt (declare x (Some (T.InitExpr (finish_value v1)))); + finish_term t2 + ] + | S.LetBlo (x, b1, t2) -> + T.Compound ( + T.DeclStmt (declare x (Some (T.InitExpr (alloc b1)))) :: + init_block x b1 @ + [ finish_term t2 ] + ) + | S.Swi (v, bs) -> + T.Switch ( + read_tag v, + finish_branches v bs, + default + ) + +and default : T.stmt = + (* This default [switch] branch should never be taken. *) + T.Compound [ + scall printf [ T.Literal "Oops! A nonexistent case has been taken.\\n" ]; + scall exit [ iconst 42 ]; + ] + +and finish_branches v bs = + List.map (finish_branch v) bs + +and finish_branch v (S.Branch (tag, xs, t)) : T.expr * T.stmt = + iconst tag, + T.Compound (read_fields v xs [finish_term t]) + +(* -------------------------------------------------------------------------- *) + +(* Function declarations. *) + +(* We distinguish the function [main], whose type is imposed by the C standard, + and ordinary functions, whose parameters have type [univ]. *) + +(* A parameter of an ordinary function has type [univ]. *) + +let param (x : S.variable) : T.param = + univ, T.Ident (var x) + +(* A declaration of an ordinary function. *) + +let declare_ordinary_function f xs : T.declaration = + answer, None, [ T.Function (None, T.Ident (var f), List.map param xs), None ] + +(* The declaration of the main function. *) + +let declare_main_function : T.declaration = + let params = [ + int, T.Ident "argc"; + char, T.Pointer (T.Pointer (T.Ident "argv")) + ] in + int, None, [ T.Function (None, T.Ident "main", params), None ] + +(* -------------------------------------------------------------------------- *) + +(* A function definition. *) + +type decl_or_fun = + T.declaration_or_function + +let define (decl : T.declaration) (t : S.term) : decl_or_fun = + T.Function ( + [], (* no comments *) + false, (* not inlined *) + decl, + T.Compound [finish_term t] + ) + +let define_ordinary_function (S.Fun (f, xs, t)) : decl_or_fun = + define (declare_ordinary_function f xs) t + +let define_main_function (t : S.term) : decl_or_fun = + define declare_main_function t + +(* -------------------------------------------------------------------------- *) + +(* Because all functions are mutually recursive, their definitions must be + preceded with their prototypes. *) + +let prototype (f : decl_or_fun) : decl_or_fun = + match f with + | T.Function (_, _, declaration, _) -> + T.Decl ([], declaration) + | T.Decl _ -> + assert false + +let prototypes (fs : decl_or_fun list) : decl_or_fun list = + List.map prototype fs @ + fs + +(* -------------------------------------------------------------------------- *) + +(* The translation of a complete program. *) + +let finish_program (S.Prog (decls, main) : S.program) : T.program = + prototypes ( + define_main_function main :: + List.map define_ordinary_function decls + ) diff --git a/projet/src/Finish.mli b/projet/src/Finish.mli new file mode 100644 index 0000000..6785984 --- /dev/null +++ b/projet/src/Finish.mli @@ -0,0 +1,5 @@ +(* This function implements a translation of the intermediate language [Top] + down to [C]. This transformation is mostly a matter of choosing appropriate + C constructs to reflect the concepts of the language [Top]. *) + +val finish_program: Top.program -> C.program diff --git a/projet/src/Lambda.ml b/projet/src/Lambda.ml new file mode 100644 index 0000000..fdb9ecb --- /dev/null +++ b/projet/src/Lambda.ml @@ -0,0 +1,48 @@ +(* This language is the untyped lambda-calculus, extended with recursive + lambda-abstractions, nonrecursive [let] bindings, integer literals and + integer arithmetic operations, and the primitive operation [print]. *) + +(* This language is really the same language as [RawLambda], with the + following internal differences: + + 1. Instead of recursive [let] bindings, the language has recursive + lambda-abstractions. A [let rec] definition whose right-hand side is not + a lambda-abstraction is rejected during the translation of [RawLambda] + to [Lambda]. + + 2. Variables are represented by atoms (instead of strings). A term with an + unbound variable is rejected during the translation of [RawLambda] to + [Lambda]. + + 3. Terms are no longer annotated with places. *) + +(* Variables are atoms. *) + +type variable = + Atom.atom + + (* Every lambda-abstraction is marked recursive or nonrecursive. Whereas a + nonrecursive lambda-abstraction [fun x -> t] binds one variable [x], a + recursive lambda-abstraction [fix f. fun x -> t] binds two variables [f] + and [x]. The variable [f] is a self-reference. *) + +and self = + | Self of variable + | NoSelf + +and binop = RawLambda.binop = + | OpAdd + | OpSub + | OpMul + | OpDiv + +and term = + | Var of variable + | Lam of self * variable * term + | App of term * term + | Lit of int + | BinOp of term * binop * term + | Print of term + | Let of variable * term * term + +[@@deriving show { with_path = false }] diff --git a/projet/src/Lexer.mll b/projet/src/Lexer.mll new file mode 100644 index 0000000..ba016f0 --- /dev/null +++ b/projet/src/Lexer.mll @@ -0,0 +1,97 @@ +{ + +open Lexing +open Error +open Parser +open RawLambda + +} + +(* -------------------------------------------------------------------------- *) + +(* Regular expressions. *) + +let newline = + ('\010' | '\013' | "\013\010") + +let whitespace = + [ ' ' '\t' ] + +let lowercase = + ['a'-'z' '\223'-'\246' '\248'-'\255' '_'] + +let uppercase = + ['A'-'Z' '\192'-'\214' '\216'-'\222'] + +let identchar = + ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '0'-'9'] + +let digit = + ['0'-'9'] + +(* -------------------------------------------------------------------------- *) + +(* The lexer. *) + +rule entry = parse +| "fun" + { FUN } +| "in" + { IN } +| "let" + { LET } +| "print" + { PRINT } +| "rec" + { REC } +| "->" + { ARROW } +| "=" + { EQ } +| "(" + { LPAREN } +| ")" + { RPAREN } +| "+" + { ADDOP OpAdd } +| "-" + { ADDOP OpSub } +| "*" + { MULOP OpMul } +| "/" + { MULOP OpDiv } +| (lowercase identchar *) as x + { IDENT x } +| digit+ as i + { try + INTLITERAL (int_of_string i) + with Failure _ -> + error (place lexbuf) "invalid integer literal." } +| "(*" + { ocamlcomment (place lexbuf) lexbuf; entry lexbuf } +| newline + { new_line lexbuf; entry lexbuf } +| whitespace+ + { entry lexbuf } +| eof + { EOF } +| _ as c + { error (place lexbuf) "unexpected character: '%c'." c } + +(* ------------------------------------------------------------------------ *) + + (* Skip OCaml-style comments. Comments can be nested. This sub-lexer is + parameterized with the place of the opening comment, so if an unterminated + comment is detected, we can show where it was opened. *) + +and ocamlcomment p = parse +| "*)" + { () } +| "(*" + { ocamlcomment (place lexbuf) lexbuf; ocamlcomment p lexbuf } +| newline + { new_line lexbuf; ocamlcomment p lexbuf } +| eof + { error p "unterminated comment." } +| _ + { ocamlcomment p lexbuf } diff --git a/projet/src/Main.ml b/projet/src/Main.ml new file mode 100644 index 0000000..9c8eed4 --- /dev/null +++ b/projet/src/Main.ml @@ -0,0 +1,101 @@ +(* -------------------------------------------------------------------------- *) + +(* Parse the command line. *) + +let debug = + ref false + +let filenames = + ref [] + +let record filename = + filenames := filename :: !filenames + +let options = + Arg.align [ + "--debug", Arg.Set debug, " Enable debugging output"; + ] + +let usage = + Printf.sprintf "Usage: %s " Sys.argv.(0) + +let () = + Arg.parse options record usage + +let debug = + !debug + +let filenames = + List.rev !filenames + +(* -------------------------------------------------------------------------- *) + +(* Printing a syntax tree in an intermediate language (for debugging). *) + +let print_delimiter () = + Printf.eprintf "----------------------------------------"; + Printf.eprintf "----------------------------------------\n" + +let dump (phase : string) (show : 'term -> string) (t : 'term) = + if debug then begin + print_delimiter(); + Printf.eprintf "%s:\n\n%s\n\n%!" phase (show t) + end; + t + +(* -------------------------------------------------------------------------- *) + +(* Reading and parsing a file. *) + +let read filename : RawLambda.term = + try + let contents = Utils.file_get_contents filename in + let lexbuf = Lexing.from_string contents in + Error.set_filename lexbuf filename; + try + Parser.entry Lexer.entry lexbuf + with + | Parser.Error -> + Error.error (Error.place lexbuf) "Syntax error." + with + | Sys_error msg -> + prerr_endline msg; + exit 1 + +(* -------------------------------------------------------------------------- *) + +(* Printing the final C program on the standard output channel. *) + +let output (p : C.program) : unit = + Printf.printf "#include\n"; + Printf.printf "#include\n"; + Printf.printf "#include \"prologue.h\"\n\n"; + let print_program = PrintCommon.print_program PrintC.p_decl_or_function in + let buf = Buffer.create 1024 in + PrintCommon.printf_of_pprint_pretty print_program buf p; + print_endline (Buffer.contents buf) + +(* -------------------------------------------------------------------------- *) + +(* The complete processing pipeline. Beautiful, isn't it? *) + +let process filename = + filename + |> read + |> dump "RawLambda" RawLambda.show_term + |> Cook.cook_term + |> dump "Lambda" Lambda.show_term + |> CPS.cps_term + |> dump "Tail" Tail.show_term + |> Defun.defun_term + |> dump "Top" Top.show_program + |> Finish.finish_program + |> dump "C" C.show_program + |> output + +(* -------------------------------------------------------------------------- *) + +(* The main program. *) + +let () = + List.iter process filenames diff --git a/projet/src/Makefile b/projet/src/Makefile new file mode 100644 index 0000000..73f39e1 --- /dev/null +++ b/projet/src/Makefile @@ -0,0 +1,28 @@ +SHELL := bash +TARGET := Main.native +JOUJOU := joujou +DIRS := kremlin,alphalib +OCAMLBUILD :=\ + ocamlbuild \ + -classic-display \ + -j 4 \ + -use-ocamlfind \ + -use-menhir \ + -menhir "menhir -lg 1 -la 1 --explain" \ + -Is $(DIRS) \ + +.PHONY: all test clean + +all: + @ $(OCAMLBUILD) -quiet $(TARGET) + @ ln -sf $(TARGET) $(JOUJOU) + +test: all + @ make -C test test + +clean: + rm -f *~ + rm -f tests/*.c tests/*.out + $(OCAMLBUILD) -clean + rm -f $(TARGET) $(JOUJOU) + $(MAKE) -C test clean diff --git a/projet/src/Parser.mly b/projet/src/Parser.mly new file mode 100644 index 0000000..4f032b2 --- /dev/null +++ b/projet/src/Parser.mly @@ -0,0 +1,155 @@ +%token IDENT +%token INTLITERAL +%token FUN IN LET PRINT REC +%token ARROW EQ LPAREN RPAREN +%token MULOP ADDOP +%token EOF + +%start entry + +%{ + +open RawLambda + +%} + +%% + +(* -------------------------------------------------------------------------- *) + +(* A toplevel phrase is just a term. *) + +entry: + t = any_term EOF + { t } + +(* -------------------------------------------------------------------------- *) + +(* The syntax of terms is stratified as follows: + + atomic_term -- unambiguously delimited terms + application_term -- n-ary applications of atomic terms + multiplicative_term -- built using multiplication & division + additive_term -- built using addition & subtraction + any_term -- everything + + A [match/with/end] construct is terminated with an [end] keyword, as in Coq, + so it is an atomic term. *) + +atomic_term_: +| LPAREN t = any_term RPAREN + { t.value } +| x = IDENT + { Var x } +| i = INTLITERAL + { Lit i } + +application_term_: +| t = atomic_term_ + { t } +| t1 = placed(application_term_) t2 = placed(atomic_term_) + { App (t1, t2) } +| PRINT t2 = placed(atomic_term_) + { Print t2 } + +%inline multiplicative_term_: + t = left_associative_level(application_term_, MULOP, mkbinop) + { t } + +%inline additive_term_: + t = left_associative_level(multiplicative_term_, ADDOP, mkbinop) + { t } + +any_term_: +| t = additive_term_ + { t } +| FUN x = IDENT ARROW t = any_term + { Lam (x, t) } +| LET mode = recursive x = IDENT EQ t1 = any_term IN t2 = any_term + { Let (mode, x, t1, t2) } + +%inline any_term: + t = placed(any_term_) + { t } + +(* -------------------------------------------------------------------------- *) + +(* An infix-left-associative-operator level in a hierarchy of arithmetic + expressions. *) + +(* [basis] is the next lower level in the hierarchy. + [op] is the category of binary operators. + [action] is a ternary sequencing construct. *) + +left_associative_level(basis, op, action): +| t = basis +| t = action( + left_associative_level(basis, op, action), + op, + basis + ) + { t } + +(* -------------------------------------------------------------------------- *) + +(* A ternary sequence whose semantic action builds a [BinOp] node. *) + +%inline mkbinop(term1, op, term2): + t1 = placed(term1) op = op t2 = placed(term2) + { BinOp (t1, op, t2) } + +(* -------------------------------------------------------------------------- *) + +(* A [let] construct carries an optional [rec] annotation. *) + +recursive: +| REC { Recursive } +| { NonRecursive } + +(* -------------------------------------------------------------------------- *) + +(* A term is annotated with its start and end positions, for use in error + messages. *) + +%inline placed(X): + x = X + { { place = ($startpos, $endpos); value = x } } + +(* -------------------------------------------------------------------------- *) + +(* In a right-flexible list, the last delimiter is optional, i.e., [delim] can + be viewed as a terminator or a separator, as desired. *) + +(* There are several ways of expressing this. One could say it is either a + separated list or a terminated list; this works if one uses right recursive + lists. Or, one could say that it is a separated list followed with an + optional delimiter; this works if one uses a left-recursive list. The + following formulation is direct and seems most natural. It should lead to + the smallest possible automaton. *) + +right_flexible_list(delim, X): +| (* nothing *) + { [] } +| x = X + { [x] } +| x = X delim xs = right_flexible_list(delim, X) + { x :: xs } + +(* In a left-flexible list, the first delimiter is optional, i.e., [delim] can + be viewed as an opening or as a separator, as desired. *) + +(* Again, there are several ways of expressing this, and again, I suppose the + following formulation is simplest. It is the mirror image of the above + definition, so it is naturally left-recursive, this time. *) + +reverse_left_flexible_list(delim, X): +| (* nothing *) + { [] } +| x = X + { [x] } +| xs = reverse_left_flexible_list(delim, X) delim x = X + { x :: xs } + +%inline left_flexible_list(delim, X): + xs = reverse_left_flexible_list(delim, X) + { List.rev xs } diff --git a/projet/src/RawLambda.ml b/projet/src/RawLambda.ml new file mode 100644 index 0000000..51f33e1 --- /dev/null +++ b/projet/src/RawLambda.ml @@ -0,0 +1,55 @@ +(* Variables are strings. *) + +type variable = + string + +(* Every [let] binding is marked recursive or nonrecursive. *) + +and recursive = + | Recursive + | NonRecursive + +(* The four standard integer arithmetic operations are supported. *) + +and binop = + | OpAdd + | OpSub + | OpMul + | OpDiv + +(* This language is the untyped lambda-calculus, extended with possibly + recursive [let] bindings, integer literals (that is, constants), integer + arithmetic operations, and the primitive operation [print], which prints an + integer value and returns it. *) + +and term_ = + | Var of variable + | Lam of variable * term + | App of term * term + | Lit of int + | BinOp of term * binop * term + | Print of term + | Let of recursive * variable * term * term + +(* Every abstract syntax tree node of type [term] is annotated with a place, + that is, a position in the source code. This allows us to produce a good + error message when a problem is detected. *) + +and term = + term_ placed + +(* A value of type ['a placed] can be thought of as a value of type ['a] + decorated with a place. *) + +and 'a placed = { + place: Error.place; + value: 'a + } + +(* The following annotation requests the automatic generation of a [show_] + function for each of the types defined above. For instance, the function + [show_term], of type [term -> string], converts a term to a string. These + functions can be useful during debugging. Running with [--debug] causes + every intermediate abstract syntax tree to be displayed in this way. *) + +[@@deriving show { with_path = false }] diff --git a/projet/src/Tail.ml b/projet/src/Tail.ml new file mode 100644 index 0000000..44b8b6d --- /dev/null +++ b/projet/src/Tail.ml @@ -0,0 +1,132 @@ +(* This intermediate language describes the result of the CPS transformation. + It is a lambda-calculus where the ordering of computations is explicit and + where every function call is a tail call. + + The following syntactic categories are distinguished: + + 1. "Values" include variables, integer literals, and applications of the + primitive integer operations to values. Instead of "values", they could + also be referred to as "pure expressions". They are expressions whose + evaluation terminates and has no side effect, not even memory + allocation. + + 2. "Blocks" include lambda-abstractions. Even though lambda-abstractions + are traditionally considered values, here, they are viewed as + expressions whose evaluation has a side effect, namely, the allocation + of a memory block. + + 3. "Terms" are expressions with side effects. Terms always appear in tail + position: an examination of the syntax of terms shows that a term can be + viewed as a sequence of [LetVal], [LetBlo] and [Print] instructions, + terminated with either [Exit] or [TailCall]. This implies, in + particular, that every call is a tail call. + + In contrast with the surface language, where every lambda-abstraction has + arity 1, in this calculus, lambda-abstractions of arbitrary arity are + supported. A lambda-abstraction [Lam] carries a list of formal arguments + and a function call [TailCall] carries a list of actual arguments. Partial + applications or over-applications are not supported: it is the programmer's + responsibility to ensure that every function call provides exactly as many + arguments as expected by the called function. *) + +type variable = + Atom.atom + +and self = Lambda.self = + | Self of variable + | NoSelf + +and binop = Lambda.binop = + | OpAdd + | OpSub + | OpMul + | OpDiv + +and value = + | VVar of variable + | VLit of int + | VBinOp of value * binop * value + +and block = + | Lam of self * variable list * term + +(* Terms include the following constructs: + + - The primitive operation [Exit] stops the program. + + - The tail call [TailCall (v, vs)] transfers control to the function [v] + with actual arguments [vs]. (The value [v] should be a function and its + arity should be the length of [vs].) + + - The term [Print (v, t)] prints the value [v], then executes the term [t]. + (The value [v] should be a primitive integer value.) + + - The term [LetVal (x, v, t)] binds the variable [x] to the value [v], then + executes the term [t]. + + - The term [LetBlo (x, b, t)] allocates the memory block [b] and binds the + variable [x] to its address, then executes the term [t]. *) + +and term = + | Exit + | TailCall of value * value list + | Print of value * term + | LetVal of variable * value * term + | LetBlo of variable * block * term + +[@@deriving show { with_path = false }] + +(* -------------------------------------------------------------------------- *) + +(* Constructor functions. *) + +let vvar x = + VVar x + +let vvars xs = + List.map vvar xs + +(* -------------------------------------------------------------------------- *) + +(* Computing the free variables of a value, block, or term. *) + +open Atom.Set + +let rec fv_value (v : value) = + match v with + | VVar x -> + singleton x + | VLit _ -> + empty + | VBinOp (v1, _, v2) -> + union (fv_value v1) (fv_value v2) + +and fv_values (vs : value list) = + union_many fv_value vs + +and fv_lambda (xs : variable list) (t : term) = + diff (fv_term t) (of_list xs) + +and fv_block (b : block) = + match b with + | Lam (NoSelf, xs, t) -> + fv_lambda xs t + | Lam (Self f, xs, t) -> + remove f (fv_lambda xs t) + +and fv_term (t : term) = + match t with + | Exit -> + empty + | TailCall (v, vs) -> + fv_values (v :: vs) + | Print (v1, t2) -> + union (fv_value v1) (fv_term t2) + | LetVal (x, v1, t2) -> + union + (fv_value v1) + (remove x (fv_term t2)) + | LetBlo (x, b1, t2) -> + union + (fv_block b1) + (remove x (fv_term t2)) diff --git a/projet/src/Top.ml b/projet/src/Top.ml new file mode 100644 index 0000000..edbb0fb --- /dev/null +++ b/projet/src/Top.ml @@ -0,0 +1,94 @@ +(* This intermediate language describes the result of defunctionalization. + It retains the key features of the previous calculus, [Tail], in that + the ordering of computations is explicit and every function call is a + tail call. Furthermore, lambda-abstractions disappear. A memory block + [Con] now contains an integer tag followed with a number of fields, + which hold values. A [switch] construct appears, which allows testing + the tag of a memory block. A number of (closed, mutually recursive) + functions can be defined at the top level. *) + +type tag = + int + +and variable = + Atom.atom + +and binop = Tail.binop = + | OpAdd + | OpSub + | OpMul + | OpDiv + +and value = Tail.value = + | VVar of variable + | VLit of int + | VBinOp of value * binop * value + +(* A block contains an integer tag, followed with a number of fields. *) + +and block = + | Con of tag * value list + +(* The construct [Swi (v, branches)] reads the integer tag stored in the + memory block at address [v] and performs a case analysis on this tag, + transferring control to the appropriate branch. (The value [v] should be a + pointer to a memory block.) *) + +and term = + | Exit + | TailCall of variable * value list + | Print of value * term + | LetVal of variable * value * term + | LetBlo of variable * block * term + | Swi of value * branch list + +(* A branch [tag xs -> t] is labeled with an integer tag [tag], and is + executed if the memory block carries this tag. The variables [xs] are + then bounds to the fields of the memory block. (The length of the list + [xs] should be the number of fields of the memory block.) *) + +and branch = + | Branch of tag * variable list * term + +(* A toplevel function declaration mentions the function's name, formal + parameters, and body. *) + +and function_declaration = + | Fun of variable * variable list * term + +(* A complete program consits of a set of toplevel function declarations + and a term (the "main program"). The functions are considered mutually + recursive: every function may refer to every function. *) + +and program = + | Prog of function_declaration list * term + +[@@deriving show { with_path = false }] + +(* -------------------------------------------------------------------------- *) + +(* Constructor functions. *) + +let vvar = + Tail.vvar + +let vvars = + Tail.vvars + +(* [let x_1 = v_1 in ... let x_n = v_n in t] *) + +let rec sequential_let (xs : variable list) (vs : value list) (t : term) = + match xs, vs with + | [], [] -> + t + | x :: xs, v :: vs -> + LetVal (x, v, sequential_let xs vs t) + | _ -> + assert false + +(* [let x_1 = v_1 and ... x_n = v_n in t] *) + +let parallel_let (xs : variable list) (vs : value list) (t : term) = + assert (List.length xs = List.length vs); + assert (Atom.Set.disjoint (Atom.Set.of_list xs) (Tail.fv_values vs)); + sequential_let xs vs t diff --git a/projet/src/_tags b/projet/src/_tags new file mode 100644 index 0000000..bf1bddf --- /dev/null +++ b/projet/src/_tags @@ -0,0 +1,8 @@ +true: \ + debug, \ + strict_sequence, \ + warn(A-3-4-30-44-42-45-50), \ + package(unix), \ + package(process), \ + package(pprint), \ + package(ppx_deriving.std) diff --git a/projet/src/alphalib/Atom.ml b/projet/src/alphalib/Atom.ml new file mode 100644 index 0000000..6502410 --- /dev/null +++ b/projet/src/alphalib/Atom.ml @@ -0,0 +1,215 @@ +(* -------------------------------------------------------------------------- *) + +(* We impose maximal sharing on strings so as to reduce the total amount of + space that they occupy. This is done using a weak hash set. *) + +module StringStorage = + Weak.Make(struct + type t = string + let equal (s1 : string) (s2 : string) = (s1 = s2) + let hash = Hashtbl.hash + end) + +let share : string -> string = + StringStorage.merge (StringStorage.create 128) + +(* -------------------------------------------------------------------------- *) + +(* Removing any trailing digits in a string. *) + +let is_digit c = + Char.code '0' <= Char.code c && Char.code c <= Char.code '9' + +let remove_trailing_digits (s : string) : string = + let n = ref (String.length s) in + while !n > 0 && is_digit s.[!n-1] do n := !n-1 done; + (* We assume that there is at least one non-digit character in the string. *) + assert (!n > 0); + String.sub s 0 !n + +(* -------------------------------------------------------------------------- *) + +(* An atom is implemented as a pair of an integer identity and a string that + serves as a printing hint. *) + +(* We maintain the invariant that a hint is nonempty and does not end in a + digit. This allows us to later produce unique identifiers, without risk of + collisions, by concatenating a hint and a unique number. *) + +(* To preserve space, hints are maximally shared. This is not essential for + correctness, though. *) + +type atom = { identity: int; hint: string } + +and t = atom + [@@deriving show { with_path = false }] + +let identity a = + a.identity + +let hint a = + a.hint + +(* -------------------------------------------------------------------------- *) + +(* A global integer counter holds the next available identity. *) + +let counter = + ref 0 + +let allocate () = + let number = !counter in + counter := number + 1; + assert (number >= 0); + number + +(* [fresh hint] produces a fresh atom. *) + +(* The argument [hint] must not be a string of digits. *) + +let fresh hint = + let identity = allocate() + and hint = share (remove_trailing_digits hint) in + { identity; hint } + +(* [copy a] returns a fresh atom modeled after the atom [a]. *) + +let copy a = + fresh a.hint + +(* -------------------------------------------------------------------------- *) + +(* Comparison of atoms. *) + +let equal a b = + a.identity = b.identity + +let compare a b = + (* Identities are always positive numbers (see [allocate] above) + so I believe overflow is impossible here. *) + a.identity - b.identity + +let hash a = + Hashtbl.hash a.identity + +(* -------------------------------------------------------------------------- *) + +(* A scratch buffer for printing. *) + +let scratch = + Buffer.create 1024 + +(* [print_separated_sequence] prints a sequence of elements into the [scratch] + buffer. The sequence is given by the higher-order iterator [iter], applied + to the collection [xs]. The separator is the string [sep]. Each element is + transformed to a string by the function [show]. *) + +let print_separated_sequence show sep iter xs : unit = + let first = ref true in + iter (fun x -> + if !first then begin + Buffer.add_string scratch (show x); + first := false + end + else begin + Buffer.add_string scratch sep; + Buffer.add_string scratch (show x) + end + ) xs + +(* -------------------------------------------------------------------------- *) + +(* Sets and maps. *) + +module Order = struct + type t = atom + let compare = compare +end + +module Set = struct + + include Set.Make(Order) + + (* A disjointness test. *) + + let disjoint xs ys = + is_empty (inter xs ys) + + (* Iterated union. *) + + let union_many (f : 'a -> t) (xs : 'a list) : t = + List.fold_left (fun accu x -> + union accu (f x) + ) empty xs + + (* Disjoint union. *) + + exception NonDisjointUnion of atom + + let disjoint_union xs ys = + match choose (inter xs ys) with + | exception Not_found -> + (* The intersection of [xs] and [ys] is empty. Return their union. *) + union xs ys + | x -> + (* The intersection contains [x]. Raise an exception. *) + raise (NonDisjointUnion x) + + let handle_NonDisjointUnion f x = + try + f x; true + with NonDisjointUnion a -> + Printf.eprintf "NonDisjointUnion: %s\n%!" (show a); + false + + (* Sets of atoms form a monoid under union. *) + + class ['z] union_monoid = object + method zero: 'z = empty + method plus: 'z -> 'z -> 'z = union + end + + (* Sets of atoms form a monoid under disjoint union. *) + + class ['z] disjoint_union_monoid = object + method zero: 'z = empty + method plus: 'z -> 'z -> 'z = disjoint_union + end + + (* These printing functions should be used for debugging purposes only. *) + + let print_to_scratch xs = + Buffer.clear scratch; + Buffer.add_string scratch "{"; + print_separated_sequence show ", " iter xs; + Buffer.add_string scratch "}" + + let show xs = + print_to_scratch xs; + let result = Buffer.contents scratch in + Buffer.reset scratch; + result + + let print oc xs = + print_to_scratch xs; + Buffer.output_buffer oc scratch; + Buffer.reset scratch + +end + +module Map = struct + + include Map.Make(Order) + + (* This is O(nlog n), whereas in principle O(n) is possible. + The abstraction barrier in OCaml's [Set] module hinders us. *) + let domain m = + fold (fun a _ accu -> Set.add a accu) m Set.empty + + let codomain f m = + fold (fun _ v accu -> Set.union (f v) accu) m Set.empty + +end + +type renaming = + atom Map.t diff --git a/projet/src/alphalib/Atom.mli b/projet/src/alphalib/Atom.mli new file mode 100644 index 0000000..6ec6206 --- /dev/null +++ b/projet/src/alphalib/Atom.mli @@ -0,0 +1,71 @@ +(* TEMPORARY document *) + +(* An atom is a pair of a unique integer identity and a (not necessarily + unique) string. *) + +type atom + +and t = atom + [@@deriving show] + +val identity: atom -> int +val hint: atom -> string + +(* Producing fresh atoms. *) + +val fresh: string -> atom +val copy: atom -> atom + +(* Comparison of atoms. *) + +val equal: atom -> atom -> bool +val compare: atom -> atom -> int +val hash: atom -> int + +(* Sets. *) + +module Set : sig + include Set.S with type elt = atom + + val disjoint: t -> t -> bool + + val union_many: ('a -> t) -> 'a list -> t + + (* Disjoint union. *) + + exception NonDisjointUnion of atom + val disjoint_union: t -> t -> t + + val handle_NonDisjointUnion: ('a -> unit) -> 'a -> bool + + (* Sets of atoms form monoids under union and disjoint union. *) + + class ['z] union_monoid : object + constraint 'z = t + method zero: 'z + method plus: 'z -> 'z -> 'z + end + + class ['z] disjoint_union_monoid : object + constraint 'z = t + method zero: 'z + method plus: 'z -> 'z -> 'z + end + + val show: t -> string + val print: out_channel -> t -> unit +end + +(* Maps. *) + +module Map : sig + + include Map.S with type key = atom + + val domain: 'a t -> Set.t + val codomain: ('a -> Set.t) -> 'a t -> Set.t + +end + +type renaming = + atom Map.t diff --git a/projet/src/kremlin/C.ml b/projet/src/kremlin/C.ml new file mode 100644 index 0000000..db29569 --- /dev/null +++ b/projet/src/kremlin/C.ml @@ -0,0 +1,126 @@ + + +module K = Constant +open Common + +(* This pretty-printer based on: http:/ /en.cppreference.com/w/c/language/declarations + * Many cases are omitted from this bare-bones C grammar; hopefully, to be extended. *) +type type_spec = + | Int of Constant.width + | Void + | Named of ident + | Struct of ident option * declaration list option + (** Note: the option allows for zero-sized structs (GCC's C, C++) but as + * of 2017/05/14 we never generate these. *) + | Union of ident option * declaration list + | Enum of ident option * ident list + (** not encoding all the invariants here *) + [@@deriving show { with_path = false }] + +and storage_spec = + | Typedef + | Extern + | Static + +and declarator_and_init = + declarator * init option + +and declarator_and_inits = + declarator_and_init list + +and declarator = + | Ident of ident + | Pointer of declarator + | Array of declarator * expr + | Function of calling_convention option * declarator * params + +and expr = + | Op1 of K.op * expr + | Op2 of K.op * expr * expr + | Index of expr * expr + | Deref of expr + | Address of expr + | Member of expr * expr + | MemberP of expr * expr + | Assign of expr * expr + (** not covering *=, +=, etc. *) + | Call of expr * expr list + | Name of ident + | Cast of type_name * expr + | Literal of string + | Constant of K.t + | Bool of bool + | Sizeof of expr + | SizeofType of type_spec + | CompoundLiteral of type_name * init list + | MemberAccess of expr * ident + | MemberAccessPointer of expr * ident + | InlineComment of string * expr * string + | Type of type_name + (** note: these two not in the C grammar *) + +(** this is a WILD approximation of the notion of "type name" in C _and_ a hack + * because there's the invariant that the ident found at the bottom of the + * [declarator] is irrelevant... *) +and type_name = + type_spec * declarator + +and params = + (* No support for old syntax, e.g. K&R, or [void f(void)]. *) + param list + +and param = + (** Also approximate. *) + type_spec * declarator + +and declaration = + type_spec * storage_spec option * declarator_and_inits + +and ident = + string + +and init = + | InitExpr of expr + | Designated of designator * init + | Initializer of init list + +and designator = + | Dot of ident + | Bracket of int + +(** Note: according to http:/ /en.cppreference.com/w/c/language/statements, + * declarations can only be part of a compound statement... we do not enforce + * this invariant via the type [stmt], but rather check it at runtime (see + * [mk_compound_if]), as the risk of messing things up, naturally. *) +type stmt = + | Compound of stmt list + | DeclStmt of declaration + | Expr of expr + | If of expr * stmt + | IfElse of expr * stmt * stmt + | While of expr * stmt + | For of declaration_or_expr * expr * expr * stmt + | Return of expr option + | Switch of expr * (expr * stmt) list * stmt + (** the last component is the default statement *) + | Break + | Comment of string + (** note: this is not in the C grammar *) + +and program = + declaration_or_function list + +and comment = + string + +and declaration_or_function = + | Decl of comment list * declaration + | Function of comment list * bool * declaration * stmt + (** [stmt] _must_ be a compound statement; boolean is inline *) + +and declaration_or_expr = [ + | `Decl of declaration + | `Expr of expr + | `Skip +] +[@@deriving show { with_path = false }] diff --git a/projet/src/kremlin/Common.ml b/projet/src/kremlin/Common.ml new file mode 100644 index 0000000..77638d7 --- /dev/null +++ b/projet/src/kremlin/Common.ml @@ -0,0 +1,19 @@ +type calling_convention = + | StdCall + | CDecl + | FastCall + [@@deriving show { with_path = false }] + +type lifetime = + | Eternal + | Stack + [@@deriving show { with_path = false }] + +type flag = + | Private + | NoExtract + | CInline + | Substitute + | GcType + | Comment of string + [@@deriving show { with_path = false }] diff --git a/projet/src/kremlin/Constant.ml b/projet/src/kremlin/Constant.ml new file mode 100644 index 0000000..5533d38 --- /dev/null +++ b/projet/src/kremlin/Constant.ml @@ -0,0 +1,66 @@ +(** Machine integers. Don't repeat the same thing everywhere. *) + +type t = width * string + [@@deriving show] + +and width = + | UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 + | Bool + | CInt (** Checked signed integers. *) + +let bytes_of_width (w: width) = + match w with + | UInt8 -> 1 + | UInt16 -> 2 + | UInt32 -> 4 + | UInt64 -> 8 + | Int8 -> 1 + | Int16 -> 2 + | Int32 -> 4 + | Int64 -> 8 + | CInt -> 4 + | _ -> invalid_arg "bytes_of_width" + +type op = + (* Arithmetic operations *) + | Add | AddW | Sub | SubW | Div | DivW | Mult | MultW | Mod + (* Bitwise operations *) + | BOr | BAnd | BXor | BShiftL | BShiftR | BNot + (* Arithmetic comparisons / boolean comparisons *) + | Eq | Neq | Lt | Lte | Gt | Gte + (* Boolean operations *) + | And | Or | Xor | Not + (* Effectful operations *) + | Assign | PreIncr | PreDecr | PostIncr | PostDecr + (* Misc *) + | Comma + [@@deriving show { with_path = false }] + +let unsigned_of_signed = function + | Int8 -> UInt8 + | Int16 -> UInt16 + | Int32 -> UInt32 + | Int64 -> UInt64 + | CInt | UInt8 | UInt16 | UInt32 | UInt64 | Bool -> raise (Invalid_argument "unsigned_of_signed") + +let is_signed = function + | Int8 | Int16 | Int32 | Int64 | CInt -> true + | UInt8 | UInt16 | UInt32 | UInt64 -> false + | Bool -> raise (Invalid_argument "is_signed") + +let is_unsigned w = not (is_signed w) + +let without_wrap = function + | AddW -> Add + | SubW -> Sub + | MultW -> Mult + | DivW -> Div + | _ -> raise (Invalid_argument "without_wrap") + +let uint8_of_int i = + assert (i < (1 lsl 8) && i >= 0); + UInt8, string_of_int i + +let uint32_of_int i = + assert (i < (1 lsl 32) && i >= 0); + UInt32, string_of_int i diff --git a/projet/src/kremlin/Options.ml b/projet/src/kremlin/Options.ml new file mode 100644 index 0000000..fd6bfbf --- /dev/null +++ b/projet/src/kremlin/Options.ml @@ -0,0 +1,7 @@ +(* By default, the C printer inserts a minimal amount of parentheses. + However, this can trigger GCC and Clang's -Wparentheses warning, + so there is an option to produce more parentheses than strictly + necessary. *) + +let parentheses = + ref false diff --git a/projet/src/kremlin/PrintC.ml b/projet/src/kremlin/PrintC.ml new file mode 100644 index 0000000..34583ee --- /dev/null +++ b/projet/src/kremlin/PrintC.ml @@ -0,0 +1,354 @@ +(** Pretty-printer that conforms with C syntax. Also defines the grammar of + * concrete C syntax, as opposed to our idealistic, well-formed C*. *) + +open PPrint +open PrintCommon +open C + +(* Pretty-printing of actual C syntax *****************************************) + +let p_storage_spec = function + | Typedef -> string "typedef" + | Extern -> string "extern" + | Static -> string "static" + +let rec p_type_spec = function + | Int w -> print_width w ^^ string "_t" + | Void -> string "void" + | Named s -> string s + | Union (name, decls) -> + group (string "union" ^/^ + (match name with Some name -> string name ^^ break1 | None -> empty)) ^^ + braces_with_nesting (separate_map hardline (fun p -> group (p_declaration p ^^ semi)) decls) + | Struct (name, decls) -> + group (string "struct" ^/^ + (match name with Some name -> string name | None -> empty)) ^^ + (match decls with + | Some decls -> + break1 ^^ braces_with_nesting (separate_map hardline (fun p -> group (p_declaration p ^^ semi)) decls) + | None -> + empty) + | Enum (name, tags) -> + group (string "enum" ^/^ + (match name with Some name -> string name ^^ break1 | None -> empty)) ^^ + braces_with_nesting (separate_map (comma ^^ break1) string tags) + + +and p_type_declarator d = + let rec p_noptr = function + | Ident n -> + string n + | Array (d, s) -> + p_noptr d ^^ lbracket ^^ p_expr s ^^ rbracket + | Function (cc, d, params) -> + let cc = match cc with Some cc -> print_cc cc ^^ break1 | None -> empty in + group (cc ^^ p_noptr d ^^ parens_with_nesting (separate_map (comma ^^ break 1) (fun (spec, decl) -> + group (p_type_spec spec ^/^ p_any decl) + ) params)) + | d -> + lparen ^^ p_any d ^^ rparen + and p_any = function + | Pointer d -> + star ^^ p_any d + | d -> + p_noptr d + in + p_any d + +and p_type_name (spec, decl) = + match decl with + | Ident "" -> + p_type_spec spec + | _ -> + p_type_spec spec ^^ space ^^ p_type_declarator decl + +(* http:/ /en.cppreference.com/w/c/language/operator_precedence *) +and prec_of_op2 op = + let open Constant in + match op with + | AddW | SubW | MultW | DivW -> failwith "[prec_of_op]: should've been desugared" + | Add -> 4, 4, 4 + | Sub -> 4, 4, 3 + | Div -> 3, 3, 2 + | Mult -> 3, 3, 3 + | Mod -> 3, 3, 2 + | BOr -> 10, 10, 10 + | BAnd -> + 8, 8, 8 + | BXor | Xor -> 9, 9, 9 + | BShiftL | BShiftR -> + 5, 5, 4 + | Eq | Neq -> 7, 7, 7 + | Lt | Lte | Gt | Gte -> 6, 6, 5 + | And -> 11, 11, 11 + | Or -> 12, 12, 12 + | Assign -> 14, 13, 14 + | Comma -> 15, 15, 14 + | PreIncr | PostIncr | PreDecr | PostDecr | Not | BNot -> raise (Invalid_argument "prec_of_op2") + +and prec_of_op1 op = + let open Constant in + match op with + | PreDecr | PreIncr | Not | BNot -> 2 + | PostDecr | PostIncr -> 1 + | _ -> raise (Invalid_argument "prec_of_op1") + +and is_prefix op = + let open Constant in + match op with + | PreDecr | PreIncr | Not | BNot -> true + | PostDecr | PostIncr -> false + | _ -> raise (Invalid_argument "is_prefix") + +(* The precedence level [curr] is the maximum precedence the current node should + * have. If it doesn't, then it should be parenthesized. Lower numbers bind + * tighter. *) +and paren_if curr mine doc = + if curr < mine then + group (lparen ^^ doc ^^ rparen) + else + doc + +(* [e] is an operand of [op]; is this likely to trigger GCC's -Wparentheses? If + * so, downgrade the current precedence to 0 to force parenthses. *) +and defeat_Wparentheses op e prec = + let open Constant in + if not !Options.parentheses then + prec + else match op, e with + | (BShiftL | BShiftR | BXor | BOr | BAnd), Op2 ((Add | Sub | BOr | BXor), _, _) -> + 0 + | _ -> + prec + +and p_expr' curr = function + | Op1 (op, e1) -> + let mine = prec_of_op1 op in + let e1 = p_expr' mine e1 in + paren_if curr mine (if is_prefix op then print_op op ^^ e1 else e1 ^^ print_op op) + | Op2 (op, e1, e2) -> + let mine, left, right = prec_of_op2 op in + let left = defeat_Wparentheses op e1 left in + let right = defeat_Wparentheses op e2 right in + let e1 = p_expr' left e1 in + let e2 = p_expr' right e2 in + paren_if curr mine (e1 ^/^ print_op op ^^ jump e2) + | Index (e1, e2) -> + let mine, left, right = 1, 1, 15 in + let e1 = p_expr' left e1 in + let e2 = p_expr' right e2 in + paren_if curr mine (e1 ^^ lbracket ^^ e2 ^^ rbracket) + | Assign (e1, e2) -> + let mine, left, right = 14, 13, 14 in + let e1 = p_expr' left e1 in + let e2 = p_expr' right e2 in + paren_if curr mine (group (e1 ^/^ equals) ^^ jump e2) + | Call (e, es) -> + let mine, left, arg = 1, 1, 14 in + let e = p_expr' left e in + let es = nest 2 (separate_map (comma ^^ break 1) (fun e -> group (p_expr' arg e)) es) in + paren_if curr mine (e ^^ lparen ^^ es ^^ rparen) + | Literal s -> + dquote ^^ string s ^^ dquote + | Constant (w, s) -> + string s ^^ if K.is_unsigned w then string "U" else empty + | Name s -> + string s + | Cast (t, e) -> + let mine, right = 2, 2 in + let e = group (p_expr' right e) in + let t = p_type_name t in + paren_if curr mine (lparen ^^ t ^^ rparen ^^ e) + | Type t -> + p_type_name t + | Sizeof e -> + let mine, right = 2, 2 in + let e = p_expr' right e in + paren_if curr mine (string "sizeof" ^^ space ^^ e) + | SizeofType t -> + string "sizeof" ^^ group (lparen ^^ p_type_spec t ^^ rparen) + | Address e -> + let mine, right = 2, 2 in + let e = p_expr' right e in + paren_if curr mine (ampersand ^^ e) + | Deref e -> + let mine, right = 2, 2 in + let e = p_expr' right e in + paren_if curr mine (star ^^ e) + | Member _ | MemberP _ -> + failwith "[p_expr']: not implemented" + | Bool b -> + string (string_of_bool b) + | CompoundLiteral (t, init) -> + (* NOTE: always parenthesize compound literal no matter what, because GCC + * parses an application of a function to a compound literal as an n-ary + * application. *) + parens_with_nesting ( + lparen ^^ p_type_name t ^^ rparen ^^ + braces_with_nesting (separate_map (comma ^^ break1) p_init init) + ) + | MemberAccess (expr, member) -> + p_expr' 1 expr ^^ dot ^^ string member + | MemberAccessPointer (expr, member) -> + p_expr' 1 expr ^^ string "->" ^^ string member + | InlineComment (s, e, s') -> + surround 2 1 (p_comment s) (p_expr' curr e) (p_comment s') + +and p_comment s = + (* TODO: escape *) + string "/* " ^^ nest 2 (flow space (words s)) ^^ string " */" + + +and p_expr e = p_expr' 15 e + +and p_init (i: init) = + match i with + | Designated (designator, i) -> + group (p_designator designator ^^ space ^^ equals ^^ space ^^ p_init i) + | InitExpr e -> + p_expr' 14 e + | Initializer inits -> + let inits = + if List.length inits > 4 then + flow (comma ^^ break1) (List.map p_init inits) + else + separate_map (comma ^^ break1) p_init inits + in + braces_with_nesting inits + +and p_designator = function + | Dot ident -> + dot ^^ string ident + | Bracket i -> + lbracket ^^ int i ^^ rbracket + +and p_decl_and_init (decl, init) = + group (p_type_declarator decl ^^ match init with + | Some init -> + space ^^ equals ^^ jump (p_init init) + | None -> + empty) + +and p_declaration (spec, stor, decl_and_inits) = + let stor = match stor with Some stor -> p_storage_spec stor ^^ space | None -> empty in + stor ^^ group (p_type_spec spec) ^/^ + separate_map (comma ^^ break 1) p_decl_and_init decl_and_inits + +(* This is abusing the definition of a compound statement to ensure it is printed with braces. *) +let nest_if f stmt = + match stmt with + | Compound _ -> + hardline ^^ f stmt + | _ -> + nest 2 (hardline ^^ f stmt) + +(* A note on the classic dangling else problem. Remember that this is how things + * are parsed (note the indentation): + * + * if (foo) + * if (bar) + * ... + * else + * ... + * + * And remember that this needs braces: + * + * if (foo) { + * if (bar) + * ... + * } else + * ... + * + * [protect_solo_if] adds braces to the latter case. However, GCC, unless + * -Wnoparentheses is given, will produce a warning for the former case. + * [protect_ite_if_needed] adds braces to the former case, when the user has + * requested the extra, unnecessary parentheses needed to silence -Wparentheses. + * *) +let protect_solo_if s = + match s with + | If _ -> Compound [ s ] + | _ -> s + +let protect_ite_if_needed s = + match s with + | IfElse _ when !Options.parentheses -> Compound [ s ] + | _ -> s + +let p_or p x = + match x with + | Some x -> p x + | None -> empty + +let rec p_stmt (s: stmt) = + (* [p_stmt] is responsible for appending [semi] and calling [group]! *) + match s with + | Compound stmts -> + lbrace ^^ nest 2 (hardline ^^ separate_map hardline p_stmt stmts) ^^ + hardline ^^ rbrace + | Expr expr -> + group (p_expr expr ^^ semi) + | Comment s -> + group (string "/*" ^/^ separate break1 (words s) ^/^ string "*/") + | For (decl, e2, e3, stmt) -> + let init = match decl with + | `Decl decl -> p_declaration decl + | `Expr expr -> p_expr expr + | `Skip -> empty + in + group (string "for" ^/^ lparen ^^ nest 2 ( + init ^^ semi ^^ break1 ^^ + p_expr e2 ^^ semi ^^ break1 ^^ + p_expr e3 + ) ^^ rparen) ^^ nest_if p_stmt stmt + | If (e, stmt) -> + group (string "if" ^/^ lparen ^^ p_expr e ^^ rparen) ^^ + nest_if p_stmt (protect_ite_if_needed stmt) + | IfElse (e, s1, s2) -> + group (string "if" ^/^ lparen ^^ p_expr e ^^ rparen) ^^ + nest_if p_stmt (protect_solo_if s1) ^^ hardline ^^ + string "else" ^^ + (match s2 with + | If _ | IfElse _ -> + space ^^ p_stmt s2 + | _ -> + nest_if p_stmt s2) + | While (e, s) -> + group (string "while" ^/^ lparen ^^ p_expr e ^^ rparen) ^^ + nest_if p_stmt s + | Return None -> + group (string "return" ^^ semi) + | Return (Some e) -> + group (string "return" ^^ jump (p_expr e ^^ semi)) + | DeclStmt d -> + group (p_declaration d ^^ semi) + | Switch (e, branches, default) -> + group (string "switch" ^/^ lparen ^^ p_expr e ^^ rparen) ^/^ + braces_with_nesting ( + separate_map hardline (fun (e, s) -> + group (string "case" ^/^ p_expr e ^^ colon) ^^ nest 2 ( + hardline ^^ p_stmt s + ) + ) branches ^^ hardline ^^ + string "default:" ^^ nest 2 ( + hardline ^^ p_stmt default + ) + ) + | Break -> + string "break" ^^ semi + +let p_comments cs = + separate_map hardline (fun c -> string ("/*\n" ^ c ^ "\n*/")) cs ^^ + if List.length cs > 0 then hardline else empty + +let p_decl_or_function (df: declaration_or_function) = + match df with + | Decl (comments, d) -> + p_comments comments ^^ + group (p_declaration d ^^ semi) + | Function (comments, inline, d, stmt) -> + p_comments comments ^^ + let inline = if inline then string "inline" ^^ space else empty in + inline ^^ group (p_declaration d) ^/^ p_stmt stmt + +let print_files = + PrintCommon.print_files p_decl_or_function diff --git a/projet/src/kremlin/PrintCommon.ml b/projet/src/kremlin/PrintCommon.ml new file mode 100644 index 0000000..84e2b40 --- /dev/null +++ b/projet/src/kremlin/PrintCommon.ml @@ -0,0 +1,96 @@ +open PPrint +open Constant +open Common + +let break1 = break 1 + +let jump ?(indent=2) body = + jump indent 1 body + +let parens_with_nesting contents = + surround 2 0 lparen contents rparen + +let braces_with_nesting contents = + surround 2 1 lbrace contents rbrace + +let int i = string (string_of_int i) + +let print_width = function + | UInt8 -> string "uint8" + | UInt16 -> string "uint16" + | UInt32 -> string "uint32" + | UInt64 -> string "uint64" + | CInt -> string "krml_checked_int" + | Int8 -> string "int8" + | Int16 -> string "int16" + | Int32 -> string "int32" + | Int64 -> string "int64" + | Bool -> string "bool" + +let print_constant = function + | w, s -> string s ^^ print_width w + +let print_op = function + | Add -> string "+" + | AddW -> string "+w" + | Sub -> string "-" + | SubW -> string "-" + | Div -> string "/" + | DivW -> string "/w" + | Mult -> string "*" + | MultW -> string "*w" + | Mod -> string "%" + | BOr -> string "|" + | BAnd -> string "&" + | BXor -> string "^" + | BNot -> string "~" + | BShiftL -> string "<<" + | BShiftR -> string ">>" + | Eq -> string "==" + | Neq -> string "!=" + | Lt -> string "<" + | Lte -> string "<=" + | Gt -> string ">" + | Gte -> string ">=" + | And -> string "&&" + | Or -> string "||" + | Xor -> string "^" + | Not -> string "!" + | PostIncr | PreIncr -> string "++" + | PostDecr | PreDecr -> string "--" + | Assign -> string "=" + | Comma -> string "," + +let print_cc = function + | CDecl -> string "__cdecl" + | StdCall -> string "__stdcall" + | FastCall -> string "__fastcall" + +let print_lident (idents, ident) = + separate_map dot string (idents @ [ ident ]) + +let print_program p decls = + separate_map (hardline ^^ hardline) p decls + +let print_files print_decl files = + separate_map hardline (fun (f, p) -> + string (String.uppercase f) ^^ colon ^^ jump (print_program print_decl p) + ) files + +let printf_of_pprint f = + fun buf t -> + PPrint.ToBuffer.compact buf (f t) + +let printf_of_pprint_pretty f = + fun buf t -> + PPrint.ToBuffer.pretty 0.95 Utils.twidth buf (f t) + +let pdoc buf d = + PPrint.ToBuffer.compact buf d + +let english_join s = + match List.rev s with + | [] -> empty + | [ x ] -> x + | last :: first -> + group (separate (comma ^^ break1) (List.rev first) ^/^ string "and" ^/^ last) diff --git a/projet/src/kremlin/Utils.ml b/projet/src/kremlin/Utils.ml new file mode 100644 index 0000000..1ac1256 --- /dev/null +++ b/projet/src/kremlin/Utils.ml @@ -0,0 +1,73 @@ +let try_finally f h = let result = + try + f () + with e -> + h (); + raise e + in + h (); + result + +let with_open_in file_path f = + let c = open_in_bin file_path in + try_finally (fun () -> + f c + ) (fun () -> + close_in c + ) + +let with_open_out file_path f = + let c = open_out file_path in + try_finally (fun () -> + f c + ) (fun () -> + close_out c + ) + + +let cp dst src = with_open_out dst (fun oc -> + with_open_in src (fun ic -> + let buf = Bytes.create 2048 in + while + let l = input ic buf 0 2048 in + if l > 0 then begin + output oc buf 0 l; + true + end else + false + do () done + )) + + +let read ic = + let buf = Buffer.create 4096 in + let s = String.create 2048 in + while begin + let l = input ic s 0 (String.length s) in + if l > 0 then begin + Buffer.add_string buf (String.sub s 0 l); + true + end else begin + false + end + end do () done; + Buffer.contents buf + +let file_get_contents f = + with_open_in f read + +(** Sniff the size of the terminal for optimal use of the width. *) +let theight, twidth = + let height, width = ref 0, ref 0 in + match + Scanf.sscanf (List.hd (Process.read_stdout "stty" [|"size"|])) "%d %d" (fun h w -> + height := h; + width := w); + !height, !width + with + | exception _ -> + 24, 80 + | 0, 0 -> + 24, 80 + | h, w -> + h, w diff --git a/projet/src/prologue.h b/projet/src/prologue.h new file mode 100644 index 0000000..239161f --- /dev/null +++ b/projet/src/prologue.h @@ -0,0 +1,83 @@ +/* A forward declaration of [block] -- see below. */ + +struct block; + +/* -------------------------------------------------------------------------- */ + +/* The type [univ] is the universal type of the values that we manipulate. + A value is either an integer or a pointer to a heap-allocated memory + block. A C union is used to represent this disjunction. There is no tag + to distinguish between the two alternatives! The source program had + better be well-typed. */ + +typedef union { + + /* Either this is an integer... */ + int literal; + + /* ... or this is a pointer to a block. */ + struct block* pointer; + +} univ; + +/* -------------------------------------------------------------------------- */ + +/* The struct [block] describes the uniform layout of a heap-allocated + memory block. A block begins with an integer tag and continues with a + sequence of fields of type [univ]. */ + +struct block { + + /* Every memory block begins with an integer tag. */ + int tag; + + /* Then, we have a sequence of fields, whose number depends on the tag. + This idiom is known in C99 as a flexible array member. */ + univ data[]; + +}; + +/* -------------------------------------------------------------------------- */ + +/* Basic operations on memory blocks. */ + +/* The macro [ALLOC(n)] allocates a block of [n] fields, and is used in a + context where an expression of type [univ] is expected. We use a C + "compound literal" containing a "designated initializer" to indicate + that we wish to choose the "pointer" side of the union. We implicitly + assume that the compiler inserts no padding between the "tag" and "data" + parts of a memory block, so [(n + 1) * sizeof(univ)] is enough space to + hold a block of [n] fields. */ + +#define ALLOC(n) \ + (univ) { .pointer = malloc((n + 1) * sizeof(univ)) } + +/* In the following macros, [u] has type [univ], so [u.pointer] has type + [struct block] and is (or should be) the address of a memory block. + [i] is a field number; the numbering of fields is 0-based. */ + +#define GET_TAG(u) \ + (u.pointer->tag) + +#define SET_TAG(u,t) \ + (u.pointer->tag = t) + +#define GET_FIELD(u,i) \ + (u.pointer->data[i]) + +#define SET_FIELD(u,i,v) \ + (u.pointer->data[i]=v) + +/* -------------------------------------------------------------------------- */ + +/* Basic operations on integers. */ + +/* The macro [FROM_INT(i)] converts the integer [i] to the type [univ]. */ + +#define FROM_INT(i) \ + (univ) { .literal = i } + +/* The macro [TO_INT(u)] converts [u] to the type [int]. */ + +#define TO_INT(u) \ + u.literal diff --git a/projet/src/test/.gitignore b/projet/src/test/.gitignore new file mode 100644 index 0000000..55b60e0 --- /dev/null +++ b/projet/src/test/.gitignore @@ -0,0 +1 @@ +test.native diff --git a/projet/src/test/.merlin b/projet/src/test/.merlin new file mode 100644 index 0000000..1a1fd25 --- /dev/null +++ b/projet/src/test/.merlin @@ -0,0 +1 @@ +B _build diff --git a/projet/src/test/Makefile b/projet/src/test/Makefile new file mode 100644 index 0000000..d66fadf --- /dev/null +++ b/projet/src/test/Makefile @@ -0,0 +1,21 @@ +OCAMLBUILD := ocamlbuild -use-ocamlfind -classic-display +TARGET := test.native +TESTS := ../tests + +.PHONY: all test expected clean + +all: + @ $(OCAMLBUILD) $(TARGET) + +test: all + @ $(MAKE) -C .. + @ ./$(TARGET) --verbosity 1 + +expected: all + @ $(MAKE) -C .. + @ ./$(TARGET) --verbosity 1 --create-expected + +clean: + @ rm -f *~ *.native *.byte + @ $(OCAMLBUILD) -clean + @ rm -f $(TESTS)/*.c $(TESTS)/*.exe $(TESTS)/*.out $(TESTS)/*.err $(TESTS)/*~ diff --git a/projet/src/test/_tags b/projet/src/test/_tags new file mode 100644 index 0000000..749aa28 --- /dev/null +++ b/projet/src/test/_tags @@ -0,0 +1,3 @@ +true: \ + warn(A-44), \ + package(unix) diff --git a/projet/src/test/auxiliary.ml b/projet/src/test/auxiliary.ml new file mode 100644 index 0000000..a7bd0c4 --- /dev/null +++ b/projet/src/test/auxiliary.ml @@ -0,0 +1,138 @@ +type filename = string +type command = string + +let has_suffix suffix name = + Filename.check_suffix name suffix + +let fail format = + Printf.kprintf failwith format + +let try_finally f h = + let result = try f() with e -> h(); raise e in + h(); result + +let with_process_code_result command (f : in_channel -> 'a) : int * 'a = + let ic = Unix.open_process_in command in + set_binary_mode_in ic false; + match f ic with + | exception e -> + ignore (Unix.close_process_in ic); raise e + | result -> + match Unix.close_process_in ic with + | Unix.WEXITED code -> + code, result + | Unix.WSIGNALED _ + | Unix.WSTOPPED _ -> + 99 (* arbitrary *), result + +let with_process_result command (f : in_channel -> 'a) : 'a = + let code, result = with_process_code_result command f in + if code = 0 then + result + else + fail "Command %S failed with exit code %d." command code + +let with_open_in filename (f : in_channel -> 'a) : 'a = + let ic = open_in filename in + try_finally + (fun () -> f ic) + (fun () -> close_in_noerr ic) + +let with_open_out filename (f : out_channel -> 'a) : 'a = + let oc = open_out filename in + try_finally + (fun () -> f oc) + (fun () -> close_out_noerr oc) + +let chunk_size = + 16384 + +let exhaust (ic : in_channel) : string = + let buffer = Buffer.create chunk_size in + let chunk = Bytes.create chunk_size in + let rec loop () = + let length = input ic chunk 0 chunk_size in + if length = 0 then + Buffer.contents buffer + else begin + Buffer.add_subbytes buffer chunk 0 length; + loop() + end + in + loop() + +let read_whole_file filename = + with_open_in filename exhaust + +let absolute_directory (path : string) : string = + try + let pwd = Sys.getcwd() in + Sys.chdir path; + let result = Sys.getcwd() in + Sys.chdir pwd; + result + with Sys_error _ -> + Printf.fprintf stderr "Error: the directory %s does not seem to exist.\n" path; + exit 1 + +let get_number_of_cores () = + try match Sys.os_type with + | "Win32" -> + int_of_string (Sys.getenv "NUMBER_OF_PROCESSORS") + | _ -> + with_process_result "getconf _NPROCESSORS_ONLN" (fun ic -> + let ib = Scanf.Scanning.from_channel ic in + Scanf.bscanf ib "%d" (fun n -> n) + ) + with + | Not_found + | Sys_error _ + | Failure _ + | Scanf.Scan_failure _ + | End_of_file + | Unix.Unix_error _ -> + 1 + +let silent command : command = + command ^ " >/dev/null 2>/dev/null" + +(* [groups eq xs] segments the list [xs] into a list of groups, where several + consecutive elements belong in the same group if they are equivalent in the + sense of the function [eq]. *) + +(* The auxiliary function [groups1] deals with the case where we have an open + group [group] of which [x] is a member. The auxiliary function [group0] + deals with the case where we have no open group. [groups] is the list of + closed groups found so far, and [ys] is the list of elements that remain to + be examined. *) + +let rec groups1 eq groups x group ys = + match ys with + | [] -> + group :: groups + | y :: ys -> + if eq x y then + groups1 eq groups x (y :: group) ys + else + groups0 eq (List.rev group :: groups) (y :: ys) + +and groups0 eq groups ys = + match ys with + | [] -> + groups + | y :: ys -> + groups1 eq groups y [y] ys + +let groups eq (xs : 'a list) : 'a list list = + List.rev (groups0 eq [] xs) + +(* [sep ss] separates the nonempty strings in the list [ss] with a space, + and concatenates everything, producing a single string. *) + +let sep (ss : string list) : string = + let ss = List.filter (fun s -> String.length s > 0) ss in + match ss with + | [] -> + "" + | s :: ss -> + List.fold_left (fun s1 s2 -> s1 ^ " " ^ s2) s ss diff --git a/projet/src/test/test.ml b/projet/src/test/test.ml new file mode 100644 index 0000000..fe98195 --- /dev/null +++ b/projet/src/test/test.ml @@ -0,0 +1,288 @@ +open Sys +open Array +open List +open Filename +open Printf +open Auxiliary + +(* -------------------------------------------------------------------------- *) + +(* Settings. *) + +let create_expected = + ref false + +let verbosity = + ref 0 + +let usage = + sprintf "Usage: %s\n" argv.(0) + +let spec = Arg.align [ + "--create-expected", Arg.Set create_expected, + " recreate the expected-output files"; + "--verbosity", Arg.Int ((:=) verbosity), + " set the verbosity level (0-2)"; +] + +let () = + Arg.parse spec (fun _ -> ()) usage + +let create_expected = + !create_expected + +let verbosity = + !verbosity + +(* -------------------------------------------------------------------------- *) + +(* Logging. *) + +(* 0 is minimal verbosity; + 1 shows some progress messages; + 2 is maximal verbosity. *) + +let log level format = + kprintf (fun s -> + if level <= verbosity then + print_string s + ) format + +(* Extend [fail] to display an information message along the way. + The message is immediately emitted by the worker, depending on + the verbosity level, whereas the failure message is sent back + to the master. *) + +let fail id format = + log 1 "[FAIL] %s\n%!" id; + fail format + +(* When issuing an external command, log it along the way. *) + +let command cmd = + log 2 "%s\n%!" cmd; + command cmd + +(* -------------------------------------------------------------------------- *) + +(* Paths. *) + +let root = + absolute_directory ".." + +let src = + root + +let good = + root ^ "/tests" + +let good_slash filename = + good ^ "/" ^ filename + +let joujou = + src ^ "/joujou" + +let gcc = + sprintf "gcc -O2 -I %s" src + +(* -------------------------------------------------------------------------- *) + +(* Test files. *) + +let thisfile = + "this input file" + +let lambda basename = + basename ^ ".lambda" + +(* -------------------------------------------------------------------------- *) + +(* Test inputs and outputs. *) + +(* A test input is a basename, without the .lambda extension. *) + +type input = + | PositiveTest of filename + +type inputs = input list + +let print_input = function + | PositiveTest basename -> + basename + +type outcome = + | OK + | Fail of string (* message *) + +let print_outcome = function + | OK -> + "" + | Fail msg -> + msg + +type output = + input * outcome + +type outputs = output list + +let print_output (input, outcome) = + printf "\n[FAIL] %s\n%s" + (print_input input) + (print_outcome outcome) + +(* -------------------------------------------------------------------------- *) + +(* Auxiliary functions. *) + +let check_expected directory id result expected = + let cmd = sep ["cd"; directory; "&&"; "cp"; "-f"; result; expected] in + let copy() = + if command cmd <> 0 then + fail id "Failed to create %s.\n" expected + in + (* If we are supposed to create the [expected] file, do so. *) + if create_expected then + copy() + (* Otherwise, check that the file [expected] exists. If it does not exist, + create it by renaming [result] to [expected], then fail and invite the + user to review the newly created file. *) + else if not (file_exists (directory ^ "/" ^ expected)) then begin + copy(); + let cmd = sep ["more"; directory ^ "/" ^ expected] in + fail id "The file %s did not exist.\n\ + I have just created it. Please review it.\n %s\n" + expected cmd + end + +(* -------------------------------------------------------------------------- *) + +(* Running a positive test. *) + +(* + Conventions: + The file %.c stores the output of joujou. + The file %.exe is the compiled program produced by gcc. + The file %.out stores the output of the compiled program. + The file %.exp stores the expected output. + The file %.err stores error output (at several stages). + *) + +let process_positive_test (base : string) : unit = + + (* Display an information message. *) + log 2 "Testing %s...\n%!" base; + + (* A suggested command. *) + let more filename = + sep ["more"; good_slash filename] + in + + (* Run joujou. *) + let source = lambda base in + let c = base ^ ".c" in + let errors = base ^ ".err" in + let cmd = sep ( + "cd" :: good :: "&&" :: + joujou :: source :: sprintf ">%s" c :: sprintf "2>%s" errors :: [] + ) in + if command cmd <> 0 then + fail base "joujou fails on this source file.\n\ + To see the source file:\n %s\n\ + To see the error messages:\n %s\n" + (more source) (more errors); + + (* Run the C compiler. *) + let binary = base ^ ".exe" in + let errors = base ^ ".err" in + let cmd = sep ( + "cd" :: good :: "&&" :: + gcc :: c :: "-o" :: binary :: sprintf "2>%s" errors :: [] + ) in + if command cmd <> 0 then + fail base "The C compiler rejects the C file.\n\ + To see the C file:\n %s\n\ + To see the compiler's error messages:\n %s\n" + (more c) (more errors); + + (* Run the compiled program. *) + let out = base ^ ".out" in + let cmd = sep ( + "cd" :: good :: "&&" :: + sprintf "./%s" binary :: sprintf ">%s" out :: "2>&1" :: [] + ) in + if command cmd <> 0 then + fail base "The compiled program fails.\n\ + To see its output:\n %s\n" (more out); + + (* Check that the file [exp] exists. *) + let exp = base ^ ".exp" in + check_expected good base out exp; + + (* Check that the output coincides with what was expected. *) + let cmd = sep ("cd" :: good :: "&&" :: "diff" :: exp :: out :: []) in + if command (silent cmd) <> 0 then + fail base "joujou accepts this input file, but produces incorrect output.\n\ + To see the difference:\n (%s)\n" + cmd; + + (* Succeed. *) + log 1 "[OK] %s\n%!" base + +(* -------------------------------------------------------------------------- *) + +(* Running a test. *) + +let process input : output = + try + begin match input with + | PositiveTest basenames -> + process_positive_test basenames + end; + input, OK + with Failure msg -> + input, Fail msg + +(* -------------------------------------------------------------------------- *) + +(* [run] runs a bunch of tests in sequence. *) + +let run (inputs : inputs) : outputs = + flush stdout; flush stderr; + let outputs = map process inputs in + outputs + +(* -------------------------------------------------------------------------- *) + +(* Main. *) + +(* Menhir can accept several .mly files at once. By convention, if several + files have the same name up to a numeric suffix, then they belong in a + single group and should be fed together to Menhir. *) + +let inputs directory : filename list = + readdir directory + |> to_list + |> filter (has_suffix ".lambda") + |> map chop_extension + |> sort compare + +let positive : inputs = + inputs good + |> map (fun basename -> PositiveTest basename) + +let inputs = + positive + +let outputs : outputs = + printf "Preparing to run %d tests...\n%!" (length inputs); + run inputs + +let successful, failed = + partition (fun (_, o) -> o = OK) outputs + +let () = + printf "%d out of %d tests are successful.\n" + (length successful) (length inputs); + failed |> iter (fun (input, outcome) -> + printf "\n[FAIL] %s\n%s" (print_input input) (print_outcome outcome) + ) diff --git a/projet/src/tests/.gitignore b/projet/src/tests/.gitignore new file mode 100644 index 0000000..9038d29 --- /dev/null +++ b/projet/src/tests/.gitignore @@ -0,0 +1,4 @@ +*.err +*.out +*.exe +*.c diff --git a/projet/src/tests/bool.exp b/projet/src/tests/bool.exp new file mode 100644 index 0000000..829400e --- /dev/null +++ b/projet/src/tests/bool.exp @@ -0,0 +1,20 @@ +1 +0 +1 +0 +0 +24 +120 +8 +13 +5 +0 +1 +7 +1 +1 +2 +6 +24 +120 +42 diff --git a/projet/src/tests/bool.lambda b/projet/src/tests/bool.lambda new file mode 100644 index 0000000..6eaf5da --- /dev/null +++ b/projet/src/tests/bool.lambda @@ -0,0 +1,88 @@ +(* Thunks. *) +let force = fun x -> x 0 in +(* Church Booleans. *) +let false = fun x -> fun y -> y in +let true = fun x -> fun y -> x in +let k = true in +let cond = fun p -> fun x -> fun y -> p x y in +let forcecond = fun p -> fun x -> fun y -> force (p x y) in +let bool2int = fun b -> cond b 1 0 in +let _ = print (bool2int true) in +let _ = print (bool2int false) in +(* Church pairs. *) +let pair = fun x -> fun y -> fun p -> cond p x y in +let fst = fun p -> p true in +let snd = fun p -> p false in +(* Church naturals. *) +let zero = fun f -> fun x -> x in +let one = fun f -> fun x -> f x in +let plus = fun m -> fun n -> fun f -> fun x -> m f (n f x) in +let two = plus one one in +let three = plus one two in +let four = plus two two in +let five = plus four one in +let six = plus four two in +let succ = plus one in +let mult = fun m -> fun n -> fun f -> m (n f) in +let exp = fun m -> fun n -> n m in +let is_zero = fun n -> n (k false) true in +let convert = fun n -> n (fun x -> x + 1) 0 in +let _ = print (bool2int (is_zero zero)) in +let _ = print (bool2int (is_zero one)) in +let _ = print (bool2int (is_zero two)) in +(* Factorial, based on Church naturals. *) +let loop = fun p -> + let n = succ (fst p) in + pair n (mult n (snd p)) +in +let fact = fun n -> + snd (n loop (pair zero one)) +in +let _ = print (convert (fact four)) in +let _ = print (convert (fact five)) in +(* Fibonacci, based on Church naturals. *) +let loop = fun p -> + let fib1 = fst p in + pair (plus fib1 (snd p)) fib1 +in +let fib = fun n -> + snd (n loop (pair one one)) +in +let _ = print (convert (fib five)) in +let _ = print (convert (fib six)) in +(* Predecessor. *) +let loop = fun p -> + let pred = fst p in + pair (succ pred) pred +in +let pred = fun n -> + snd (n loop (pair zero zero)) +in +let _ = print (convert (pred six)) in +(* Comparison, yielding a Church Boolean. *) +let geq = fun m -> fun n -> + m pred n (k false) true +in +let _ = print (bool2int (geq four six)) in +let _ = print (bool2int (geq six one)) in +(* Iteration. *) +let iter = fun f -> fun n -> + n f (f one) +in +(* Ackermann's function. *) +let ack = fun n -> n iter succ n in +let _ = print (convert (ack two)) in +(* A fixpoint. *) +let fix = fun f -> (fun y -> f (fun z -> y y z)) (fun y -> f (fun z -> y y z)) in +(* Another version of fact. *) +let fact = fun n -> + fix (fun fact -> fun n -> forcecond (is_zero n) (fun _ -> one) (fun _ -> mult n (fact (pred n)))) n +in +let _ = print (convert (fact zero)) in +let _ = print (convert (fact one)) in +let _ = print (convert (fact two)) in +let _ = print (convert (fact three)) in +let _ = print (convert (fact four)) in +let _ = print (convert (fact five)) in + +print 42 diff --git a/projet/src/tests/church.exp b/projet/src/tests/church.exp new file mode 100755 index 0000000..d81cc07 --- /dev/null +++ b/projet/src/tests/church.exp @@ -0,0 +1 @@ +42 diff --git a/projet/src/tests/church.lambda b/projet/src/tests/church.lambda new file mode 100644 index 0000000..0cbd2e6 --- /dev/null +++ b/projet/src/tests/church.lambda @@ -0,0 +1,16 @@ +let i = fun x -> x in +let k = fun x -> fun y -> x in +let zero = fun f -> i in +let one = fun f -> fun x -> f x in +let plus = fun m -> fun n -> fun f -> fun x -> m f (n f x) in +let succ = plus one in +let mult = fun m -> fun n -> fun f -> m (n f) in +let exp = fun m -> fun n -> n m in +let two = succ one in +let three = succ two in +let six = mult two three in +let seven = succ six in +let twenty_one = mult three seven in +let forty_two = mult two twenty_one in +let convert = fun n -> n (fun x -> x + 1) 0 in +print (convert forty_two) diff --git a/projet/src/tests/hello.exp b/projet/src/tests/hello.exp new file mode 100755 index 0000000..d81cc07 --- /dev/null +++ b/projet/src/tests/hello.exp @@ -0,0 +1 @@ +42 diff --git a/projet/src/tests/hello.lambda b/projet/src/tests/hello.lambda new file mode 100644 index 0000000..86f387e --- /dev/null +++ b/projet/src/tests/hello.lambda @@ -0,0 +1 @@ +print (21 + 21) diff --git a/projet/src/tests/loop/loop.lambda b/projet/src/tests/loop/loop.lambda new file mode 100644 index 0000000..0629593 --- /dev/null +++ b/projet/src/tests/loop/loop.lambda @@ -0,0 +1,10 @@ +(* This program is supposed to never terminate. + This can actually work if the C compiler is + smart enough to recognize and optimize tail + calls. It works for me with clang but not with + gcc, for some reason. *) +let rec loop = fun x -> + let _ = print x in + loop (x + 1) +in +loop 0 diff --git a/projet/src/tests/printprint.exp b/projet/src/tests/printprint.exp new file mode 100755 index 0000000..daaac9e --- /dev/null +++ b/projet/src/tests/printprint.exp @@ -0,0 +1,2 @@ +42 +42 diff --git a/projet/src/tests/printprint.lambda b/projet/src/tests/printprint.lambda new file mode 100644 index 0000000..f0501dd --- /dev/null +++ b/projet/src/tests/printprint.lambda @@ -0,0 +1,2 @@ +(* Because [print] returns its argument, this program should print 42 twice. *) +print (print 42) diff --git a/projet/sujet.pdf b/projet/sujet.pdf new file mode 100644 index 0000000..b7f0a6d Binary files /dev/null and b/projet/sujet.pdf differ