ITエンジニアのブログ

IT企業でエンジニアやってる人間の日常について

プログラミング言語を作る。第9回:型検査

第8回で計算式の平坦化を行いましたが、それは全て Int 型を想定して行っていました。拡張として、複数の型を導入して、型検査を実行してみようと思います。例えば、プラスの記号の両側には同じ型の値が与えられ、それらと同じ型の値を返すというように決めておきます。

1 + 2 の解釈

  1. 1 : Int
  2. 2 : Int
  3. 1 + 2 : Int(ok) + Int(ok) -> Int

1 + 3.0f の解釈

  1. 1 : Int
  2. 3.0f : Float
  3. 1 + 3.0f : Int + Float (ng)

実装したプログラムに対する入力と出力を先にあげます。

Int printInt(Int i){
    return 1;
}

Int main(){
    Int i;
    i = 2 + 3 * 4 + 5 % 6;
    Float f;
    f = 2.3f + 4.2f * 3.0f - 10.0f / 2.4f;
    Bool b;
    b = i > 10;
    printInt(i);
    return 0;
}

という入力を入れると次のようになります。

[Func(Int, printInt, [(Int, i)], [Return(Int 1)]); Func(Int, main, [], [Let(Int, i); Assign(i, Add(Add(Int 2, Mul(Int 3, Int 4)), Mod(Int 5, Int 6))); Let(Float, f); Assign(f, Sub(Add(Float 2.3, Mul(Float 4.2, Float 3.)), Div(Float 10., Float 2.4))); Let(Bool, b); Assign(b, Gt(Var i, Int 10)); Exp(App(printInt, [Var i])); Return(Int 0)])]

Int printInt (Int i){
  LoadInt Int %1 1
  Ret Int %1
}

Int main (){
  LoadInt Int %1 2
  LoadInt Int %2 3
  LoadInt Int %3 4
  Mul Int %4 Int %2 Int %3
  Add Int %5 Int %1 Int %4
  LoadInt Int %6 5
  LoadInt Int %7 6
  Mod Int %8 Int %6 Int %7
  Add Int %9 Int %5 Int %8
  Mov Int %0 Int %9
  LoadFloat Float %11 2.3
  LoadFloat Float %12 4.2
  LoadFloat Float %13 3.
  FMul Float %14 Float %12 Float %13
  FAdd Float %15 Float %11 Float %14
  LoadFloat Float %16 10.
  LoadFloat Float %17 2.4
  FDiv Float %18 Float %16 Float %17
  FSub Float %19 Float %15 Float %18
  Mov Float %10 Float %19
  LoadInt Int %21 10
  Gt Bool %22 Int %0 Int %21
  Mov Bool %20 Bool %22
  App Int %23 printInt(Int %0)
  LoadInt Int %24 0
  Ret Int %24
}

見事、型を自動で解釈し、検査してくれました。入力の 4.2f を 4 とする、 printInt(i) を printInt(b) にする、 Int i; の宣言を消すなどすると、エラーを出力してくれます。

以下がソースコードです。
tokenizer.mll

{
    open Parser

    exception TokenizeError of string
}

let digit = ['0' - '9']
let lower = ['a' - 'z']
let upper = ['A' - 'Z']

rule tokenize = parse
    | [' ' '\t' '\n'] { tokenize lexbuf }
    | digit+ as v { INTLI (int_of_string v) }
    | digit+ ['l' 'L'] as v { LONGLI (Int64.of_string (String.sub v 0 (String.length v - 1))) }
    | digit+ "." digit+ ['f' 'F'] as v { FLOATLI (float_of_string (String.sub v 0 (String.length v - 1))) }
    | "{" { LMPAR }
    | "}" { RMPAR }
    | ";" { SCOLON }
    | "=" { EQUAL }
    | "(" { LPAR }
    | ")" { RPAR }
    | "+" { PLUS }
    | "-" { MINUS }
    | "*" { STAR }
    | "/" { SLASH }
    | "%" { PERCENT }
    | "," { COMMA }
    | "<" { LT }
    | ">" { GT }
    | "<=" { LE }
    | ">=" { GE }
    | "==" { EQ }
    | "True" { BOOLLI true }
    | "False" { BOOLLI false }
    | "return" { RETURN }
    | lower (lower | upper | digit | "_")* as v { LOWLI v }
    | upper (lower | upper | digit | "_") * as v { UPLI v }
    | eof { EOF }
    | _ { raise (TokenizeError "illegal character") }

parser.mly

%{
    open Syntax
    exception ParseError of string
    let parse_error s = raise (ParseError (s ^ " " ^ (string_of_int (Parsing.symbol_start ())) ^ " - " ^ (string_of_int (Parsing.symbol_end ()))))
%}

%token <int> INTLI
%token <float> FLOATLI
%token <string> UPLI LOWLI
%token <bool> BOOLLI
%token <int64> LONGLI
%token LT GT LE GE EQ
%token LMPAR RMPAR LPAR RPAR
%token SCOLON EQUAL COMMA
%token PLUS MINUS STAR PERCENT SLASH
%token RETURN
%token EOF

%start parse
%type <Syntax.funstate list> parse

%%

parse:
    | EOF { [] }
    | funstates EOF { $1 }
funstates:
    | funstate funstates { $1 :: $2 }
    | funstate { [$1] }
funstate:
    | UPLI LOWLI LPAR RPAR LMPAR states RMPAR { Func($1, $2, [], $6) }
    | UPLI LOWLI LPAR funargs RPAR LMPAR states RMPAR { Func($1, $2, $4, $7) }
funargs:
    | UPLI LOWLI COMMA funargs { ($1, $2) :: $4 }
    | UPLI LOWLI { [($1, $2)] }
states:
    | state states { $1 :: $2 }
    | state { [$1] }
state:
    | UPLI LOWLI SCOLON { Let($1, $2) }
    | LOWLI EQUAL expp0 SCOLON { Assign($1, $3) }
    | RETURN expp0 SCOLON { Return $2 }
    | expp0 SCOLON { Exp $1 }
expp0:
    | exp0 LT exp0 { Lt($1, $3) }
    | exp0 GT exp0 { Gt($1, $3) }
    | exp0 LE exp0 { Le($1, $3) }
    | exp0 GE exp0 { Ge($1, $3) }
    | exp0 EQ exp0 { Eq($1, $3) }
    | exp0 { $1 }
exp0:
    | exp0 PLUS exp1 { Add($1, $3) }
    | exp0 MINUS exp1 { Sub($1, $3) }
    | exp1 { $1 }
exp1:
    | exp1 STAR exp2 { Mul($1, $3) }
    | exp1 PERCENT exp2 { Mod($1, $3) }
    | exp1 SLASH exp2 { Div($1, $3) }
    | exp2 { $1 }
exp2:
    | MINUS exp3 { Minus($2) }
    | exp3 { $1 }
exp3:
    | LOWLI LPAR RPAR { App($1, []) }
    | LOWLI LPAR exps RPAR { App($1, $3) }
    | exp4 { $1 }
exp4:
    | LPAR expp0 RPAR { $2 }
    | INTLI { Int $1 }
    | LOWLI { Var $1 }
    | FLOATLI { Float $1 }
    | BOOLLI { Bool $1 }
    | LONGLI { Long $1 }
exps:
    | expp0 COMMA exps { $1 :: $3 }
    | expp0 { [$1] }
;

syntax.ml

open Base

type exp =
    | Int of int
    | Float of float
    | Bool of bool
    | Long of int64
    | Add of exp * exp
    | Sub of exp * exp
    | Mul of exp * exp
    | Div of exp * exp
    | Mod of exp * exp
    | Minus of exp
    | Var of string
    | App of string * exp list
    | Le of exp * exp
    | Ge of exp * exp
    | Lt of exp * exp
    | Gt of exp * exp
    | Eq of exp * exp

type state =
    | Let of string * string
    | Assign of string * exp
    | Return of exp
    | Exp of exp

type funstate =
    | Func of string * string * (string * string) list * state list

let rec string_of_exp = function
    | Int i -> "Int " ^ string_of_int i
    | Float f -> "Float " ^ string_of_float f
    | Bool b -> "Bool " ^ (if b then "true" else "false")
    | Long l -> "Long " ^ Int64.to_string l
    | Var s -> "Var " ^ s
    | Add(e1, e2) -> "Add(" ^ string_of_exp e1 ^ ", " ^ string_of_exp e2 ^ ")"
    | Sub(e1, e2) -> "Sub(" ^ string_of_exp e1 ^ ", " ^ string_of_exp e2 ^ ")"
    | Mul(e1, e2) -> "Mul(" ^ string_of_exp e1 ^ ", " ^ string_of_exp e2 ^ ")"
    | Div(e1, e2) -> "Div(" ^ string_of_exp e1 ^ ", " ^ string_of_exp e2 ^ ")"
    | Mod(e1, e2) -> "Mod(" ^ string_of_exp e1 ^ ", " ^ string_of_exp e2 ^ ")"
    | Le(e1, e2) -> "Le(" ^ string_of_exp e1 ^ ", " ^ string_of_exp e2 ^ ")"
    | Ge(e1, e2) -> "Ge(" ^ string_of_exp e1 ^ ", " ^ string_of_exp e2 ^ ")"
    | Lt(e1, e2) -> "Lt(" ^ string_of_exp e1 ^ ", " ^ string_of_exp e2 ^ ")"
    | Gt(e1, e2) -> "Gt(" ^ string_of_exp e1 ^ ", " ^ string_of_exp e2 ^ ")"
    | Eq(e1, e2) -> "Eq(" ^ string_of_exp e1 ^ ", " ^ string_of_exp e2 ^ ")"
    | Minus e -> "Minus(" ^ string_of_exp e ^ ")"
    | App(s, es) -> "App(" ^ s ^ ", [" ^  join "; " (List.map string_of_exp es) ^ "])"

let string_of_state = function
    | Let(u, s) -> "Let(" ^ u ^ ", " ^ s ^ ")"
    | Assign(s, e) -> "Assign(" ^ s ^ ", " ^ string_of_exp e ^ ")"
    | Return e -> "Return(" ^ string_of_exp e ^ ")"
    | Exp e -> "Exp(" ^ string_of_exp e ^ ")"

let string_of_funstate = function
    | Func(u, s, args, states) -> "Func(" ^ u ^ ", " ^ s ^ ", [" ^ join "; " (List.map (fun (x, y) -> "(" ^ x ^ ", " ^ y ^ ")") args) ^ "], " ^ ("[" ^ join "; " (List.map string_of_state states) ^ "]") ^ ")"

base.ml

let rec join s = function
    | [] -> ""
    | [x] -> x
    | x :: xs -> x ^ s ^ join s xs

kNormal.ml

open Base

exception Error of string

type datatype =
    | Bool
    | Int
    | Long
    | Float
    | Fun of datatype list * datatype

type var = datatype * string

type state =
    | LoadInt of var * int
    | LoadBool of var * bool
    | LoadFloat of var * float
    | LoadLong of var * int64
    | Add of var * var * var
    | Sub of var * var * var
    | Mul of var * var * var
    | Div of var * var * var
    | Mod of var * var * var
    | Neg of var * var
    | FAdd of var * var * var
    | FSub of var * var * var
    | FMul of var * var * var
    | FDiv of var * var * var
    | FMod of var * var * var
    | FNeg of var * var
    | Le of var * var * var
    | Ge of var * var * var
    | Lt of var * var * var
    | Gt of var * var * var
    | Eq of var * var * var
    | Mov of var * var
    | App of var * string * var list
    | Ret of var

type funstate =
    | Func of datatype * string * (datatype * string) list * state list

let string_of_datatype = function
    | Int -> "Int"
    | Bool -> "Bool"
    | Float -> "Float"
    | Long -> "Long"
    | _ -> raise (Error "string_of_datatype")

let string_to_datatype = function
    | "Bool" -> Bool
    | "Int" -> Int
    | "Float" -> Float
    | "Long" -> Long
    | _ -> raise (Failure "type not exist")

let string_of_var (t, s) = string_of_datatype t ^ " " ^ s

let debug_of_state = function
    | LoadInt(s, i) -> "LoadInt " ^ (string_of_var s) ^ " " ^ string_of_int i
    | LoadBool(s, b) -> "LoadBool " ^ (string_of_var s) ^ " " ^ (if b then "true" else "false")
    | LoadFloat(s, f) -> "LoadFloat " ^ (string_of_var s) ^ " " ^ string_of_float f
    | LoadLong(s, l) -> "LoadLong " ^ (string_of_var s) ^ " " ^ Int64.to_string l
    | Add(s0, s1, s2) -> "Add " ^ join " " (List.map string_of_var [s0; s1; s2])
    | Sub(s0, s1, s2) -> "Sub " ^ join " " (List.map string_of_var [s0; s1; s2])
    | Mul(s0, s1, s2) -> "Mul " ^ join " " (List.map string_of_var [s0; s1; s2])
    | Div(s0, s1, s2) -> "Div " ^ join " " (List.map string_of_var [s0; s1; s2])
    | Mod(s0, s1, s2) -> "Mod " ^ join " " (List.map string_of_var [s0; s1; s2])
    | Neg(s0, s1) -> "Neg " ^ join " " (List.map string_of_var [s0; s1])
    | FAdd(s0, s1, s2) -> "FAdd " ^ join " " (List.map string_of_var [s0; s1; s2])
    | FSub(s0, s1, s2) -> "FSub " ^ join " " (List.map string_of_var [s0; s1; s2])
    | FMul(s0, s1, s2) -> "FMul " ^ join " " (List.map string_of_var [s0; s1; s2])
    | FDiv(s0, s1, s2) -> "FDiv " ^ join " " (List.map string_of_var [s0; s1; s2])
    | FMod(s0, s1, s2) -> "FMod " ^ join " " (List.map string_of_var [s0; s1; s2])
    | FNeg(s0, s1) -> "FNeg " ^ join " " (List.map string_of_var [s0; s1])
    | Le(s0, s1, s2) -> "Le " ^ join " " (List.map string_of_var [s0; s1; s2])
    | Ge(s0, s1, s2) -> "Ge " ^ join " " (List.map string_of_var [s0; s1; s2])
    | Lt(s0, s1, s2) -> "Lt " ^ join " " (List.map string_of_var [s0; s1; s2])
    | Gt(s0, s1, s2) -> "Gt " ^ join " " (List.map string_of_var [s0; s1; s2])
    | Eq(s0, s1, s2) -> "Eq " ^ join " " (List.map string_of_var [s0; s1; s2])
    | Mov(s0, s1) -> "Mov " ^ join " " (List.map string_of_var [s0; s1])
    | App(s0, s1, xs) -> "App " ^ (string_of_var s0) ^ " " ^ s1 ^ "(" ^ join "," (List.map string_of_var xs) ^ ")"
    | Ret s -> "Ret " ^ string_of_var s

let debug_of_funstate = function
    | Func(d, s, vs, xs) ->
        string_of_datatype d ^ " " ^ s ^ " " ^ "(" ^ join "," (List.map string_of_var vs) ^ "){\n" ^ join "\n" (List.map (fun x -> "  " ^ debug_of_state x) xs) ^ "\n}"

let make_variable i = "%" ^ string_of_int i

let rec gather_functions = function
    | [] -> []
    | Syntax.Func(t, f, args, states) :: xs -> (f, Fun(List.map (fun x -> string_to_datatype (fst x)) args, string_to_datatype t), f) :: gather_functions xs

let rec find_variable_sub z = function
    | [] -> None
    | (v, t, x) :: xs -> if z = v then Some (t, x) else find_variable_sub z xs

let rec find_variable z = function
    | [] -> None
    | l :: ls -> (match find_variable_sub z l with | Some x -> Some x | None -> find_variable z ls)

let rec check_exp i env = function
    | Syntax.Int n ->
        let s = make_variable i in
        ([LoadInt((Int, s), n)], (Int, s), i + 1)
    | Syntax.Long n ->
        let s = make_variable i in
        ([LoadLong((Long, s), n)], (Long, s), i + 1)
    | Syntax.Float n ->
        let s = make_variable i in
        ([LoadFloat((Float, s), n)], (Float, s), i + 1)
    | Syntax.Bool b ->
        let s = make_variable i in
        ([LoadBool((Bool, s), b)], (Bool, s), i + 1)
    | Syntax.Add(e1, e2) ->
        let (l, vs, j) = check_exps i env [e1; e2] in
        let (t0, s0) = List.nth vs 0 in
        let (t1, s1) = List.nth vs 1 in
        let s = make_variable j in
        if (t0 = Int || t0 = Long) && t0 = t1 then
            (l @ [Add((t0, s), (t0, s0), (t1, s1))], (t0, s), j + 1)
        else
            if t0 = Float && t0 = t1 then
                (l @ [FAdd((t0, s), (t0, s0), (t1, s1))], (t0, s), j + 1)
            else
                raise (Error "type invalid for Add")
    | Syntax.Sub(e1, e2) ->
        let (l, vs, j) = check_exps i env [e1; e2] in
        let (t0, s0) = List.nth vs 0 in
        let (t1, s1) = List.nth vs 1 in
        let s = make_variable j in
        if (t0 = Int || t0 = Long) && t0 = t1 then
            (l @ [Sub((t0, s), (t0, s0), (t1, s1))], (t0, s), j + 1)
        else
            if t0 = Float && t0 = t1 then
                (l @ [FSub((t0, s), (t0, s0), (t1, s1))], (t0, s), j + 1)
            else
                raise (Error "type invalid for Sub")
    | Syntax.Mul(e1, e2) ->
        let (l, vs, j) = check_exps i env [e1; e2] in
        let (t0, s0) = List.nth vs 0 in
        let (t1, s1) = List.nth vs 1 in
        let s = make_variable j in
        if (t0 = Int || t0 = Long) && t0 = t1 then
            (l @ [Mul((t0, s), (t0, s0), (t1, s1))], (t0, s), j + 1)
        else
            if t0 = Float && t0 = t1 then
                (l @ [FMul((t0, s), (t0, s0), (t1, s1))], (t0, s), j + 1)
            else
                raise (Error "type invalid for Mul")
    | Syntax.Div(e1, e2) ->
        let (l, vs, j) = check_exps i env [e1; e2] in
        let (t0, s0) = List.nth vs 0 in
        let (t1, s1) = List.nth vs 1 in
        let s = make_variable j in
        if (t0 = Int || t0 = Long) && t0 = t1 then
            (l @ [Div((t0, s), (t0, s0), (t1, s1))], (t0, s), j + 1)
        else
            if t0 = Float && t0 = t1 then
                (l @ [FDiv((t0, s), (t0, s0), (t1, s1))], (t0, s), j + 1)
            else
                raise (Error "type invalid for Div")
    | Syntax.Mod(e1, e2) ->
        let (l, vs, j) = check_exps i env [e1; e2] in
        let (t0, s0) = List.nth vs 0 in
        let (t1, s1) = List.nth vs 1 in
        let s = make_variable j in
        if (t0 = Int || t0 = Long) && t0 = t1 then
            (l @ [Mod((t0, s), (t0, s0), (t1, s1))], (t0, s), j + 1)
        else
            if t0 = Float && t0 = t1 then
                (l @ [FMod((t0, s), (t0, s0), (t1, s1))], (t0, s), j + 1)
            else
                raise (Error "type invalid for Mod")
    | Syntax.Minus e ->
        let (l, (t, x), j) = check_exp i env e in
        let s = make_variable j in
        if t = Int || t = Long then
            (l @ [Neg((t, s), (t, x))], (t, s), j + 1)
        else
            if t = Float then
                (l @ [FNeg((t, s), (t, x))], (t, s), j + 1)
            else
                raise (Error "type invalid for Minus")
    | Syntax.Le(e1, e2) ->
        let (l, vs, j) = check_exps i env [e1; e2] in
        let (t0, s0) = List.nth vs 0 in
        let (t1, s1) = List.nth vs 1 in
        let s = make_variable j in
        if (t0 = Int || t0 = Long || t0 = Float) && t0 = t1 then
            (l @ [Le((Bool, s), (t0, s0), (t1, s1))], (Bool, s), j + 1)
        else
            raise (Error "type invalid for Le")
    | Syntax.Ge(e1, e2) ->
        let (l, vs, j) = check_exps i env [e1; e2] in
        let (t0, s0) = List.nth vs 0 in
        let (t1, s1) = List.nth vs 1 in
        let s = make_variable j in
        if (t0 = Int || t0 = Long || t0 = Float) && t0 = t1 then
            (l @ [Ge((Bool, s), (t0, s0), (t1, s1))], (Bool, s), j + 1)
        else
            raise (Error "type invalid for Ge")
    | Syntax.Lt(e1, e2) ->
        let (l, vs, j) = check_exps i env [e1; e2] in
        let (t0, s0) = List.nth vs 0 in
        let (t1, s1) = List.nth vs 1 in
        let s = make_variable j in
        if (t0 = Int || t0 = Long || t0 = Float) && t0 = t1 then
            (l @ [Lt((Bool, s), (t0, s0), (t1, s1))], (Bool, s), j + 1)
        else
            raise (Error "type invalid for Lt")
    | Syntax.Gt(e1, e2) ->
        let (l, vs, j) = check_exps i env [e1; e2] in
        let (t0, s0) = List.nth vs 0 in
        let (t1, s1) = List.nth vs 1 in
        let s = make_variable j in
        if (t0 = Int || t0 = Long || t0 = Float) && t0 = t1 then
            (l @ [Gt((Bool, s), (t0, s0), (t1, s1))], (Bool, s), j + 1)
        else
            raise (Error "type invalid for Gt")
    | Syntax.Eq(e1, e2) ->
        let (l, vs, j) = check_exps i env [e1; e2] in
        let (t0, s0) = List.nth vs 0 in
        let (t1, s1) = List.nth vs 1 in
        let s = make_variable j in
        if (t0 = Int || t0 = Long || t0 = Float) && t0 = t1 then
            (l @ [Eq((Bool, s), (t0, s0), (t1, s1))], (Bool, s), j + 1)
        else
            raise (Error "type invalid for Eq")
    | Syntax.Var s ->
        begin
            match find_variable s env with
                | Some (t, v) -> ([], (t, v), i)
                | None -> raise (Error "no such variable to refer")
        end
    | Syntax.App(s, es) ->
        let (l, vs, j) = check_exps i env es in
        match find_variable s env with
            | Some (Fun (args, target), f) ->
                if args = List.map fst vs then
                    let nv = make_variable j in
                    (l @ [App((target, nv), f, vs)], (target, nv), j + 1)
                else
                    raise (Error "illegal type for App")
            | _ -> raise (Error "no such function")
and check_exps i env = function
    | [] -> ([], [], i)
    | x :: xs ->
        let (l, s, j) = check_exp i env x in
        let (ls, ss, k) = check_exps j env xs in
        (l @ ls, s :: ss, k)

let rec check_states ty i env = function
    | [] -> []
    | Syntax.Let(t, s) :: xs ->
        if List.exists (fun (f, _, _) -> s = f) (List.hd env) then
            raise (Failure "deplicate variable")
        else
            check_states ty (i + 1) (((s, string_to_datatype t, make_variable i) :: (List.hd env)) :: List.tl env) xs
    | Syntax.Assign(s, e) :: xs ->
        let (l, (t, x), j) = check_exp i env e in
        begin
            match find_variable s env with
                | Some (c, d) ->
                    if c = t then
                        l @ [Mov((c, d), (t, x))] @ check_states ty j env xs
                    else
                        raise (Error "type error for Mov")
                | None -> raise (Error "no such variable for assign")
        end
    | Syntax.Return e :: xs ->
        let (l, (t, x), j) = check_exp i env e in
        if t = ty then
            l @ [Ret((t, x))] @ check_states ty j env xs
        else
            raise (Error "illegal type for return")
    | Syntax.Exp e :: xs ->
        let (l, _, j) = check_exp i env e in
        l @ check_states ty j env xs

let rec read_args i = function
    | [] -> []
    | (t, s) :: xs -> (s, string_to_datatype t, make_variable i) :: read_args (i + 1) xs

let check env = function
    | Syntax.Func(t, f, args, states) -> Func(string_to_datatype t, f, List.map (fun (t, s) -> (string_to_datatype t, s)) args, check_states (string_to_datatype t) (List.length args) ((read_args 0 args) :: env) states)

let check_functions funs =
    let env = [gather_functions funs] in
    List.map (check env) funs

main.ml

open Base

let _ =
    let tree = Parser.parse Tokenizer.tokenize (Lexing.from_channel (open_in Sys.argv.(1))) in
    print_string ("[" ^ join "; " (List.map Syntax.string_of_funstate tree) ^ "]\n\n");
    print_string (join "\n\n" (List.map KNormal.debug_of_funstate (KNormal.check_functions tree)) ^ "\n")