ITエンジニアのブログ

IT企業でエンジニアやってる人間の日常について

プログラミング言語を作る。第8回:計算式の平坦化

そういえばドラゴン本を買いました。これを参考にしてコンパイラ制作を加速させます。

第5回で字句解析と構文解析を行いました。四則演算や関数適用などの簡易的な入力に対し、それを構文解析して抽象構文木を出力しましたが、今度はそれを変換します。例えば、 1 + 2 * 3 という計算式は Add(Int 1, Mul(Int 2, Int 3)) というような構文木になりますが、それを a = 1; b = 2 * 3; c = a + b というように演算子一つの複数の計算に分解します。ここでは便宜上、平坦化と読んでいますが、一般的にそういわれているわけではなく、ドラゴン本では 3 アドレスコードへの変換と称されています。簡単のため、今回は型を Int に固定しています。

まず、関数の引数を複数取れるように字句解析器と構文解析器を変更しました。

tokenizer.mll

{
    open Parser

    exception TokenizeError of string
}

let digit = ['0' - '9']
let lower = ['a' - 'z']
let upper = ['A' - 'Z']

rule tokenize = parse
    | [' ' '\t' '\n'] { tokenize lexbuf }
    | digit+ as v { INTLI (int_of_string v) }
    | "{" { LMPAR }
    | "}" { RMPAR }
    | ";" { SCOLON }
    | "=" { EQUAL }
    | "(" { LPAR }
    | ")" { RPAR }
    | "+" { PLUS }
    | "-" { MINUS }
    | "*" { STAR }
    | "/" { SLASH }
    | "%" { PERCENT }
    | "," { COMMA }
    | "return" { RETURN }
    | lower (lower | upper | digit | "_")* as v { LOWLI v }
    | upper (lower | upper | digit | "_") * as v { UPLI v }
    | eof { EOF }
    | _ { raise (TokenizeError "illegal character") }

parser.mly

%{
    open Syntax
    exception ParseError of string
    let parse_error s = raise (ParseError s)
%}

%token <int> INTLI
%token <string> UPLI LOWLI
%token LMPAR RMPAR LPAR RPAR
%token SCOLON EQUAL COMMA
%token PLUS MINUS STAR PERCENT SLASH
%token RETURN
%token EOF

%start parse
%type <Syntax.funstate> parse

%%

parse:
    | funstate EOF { $1 }
funstate:
    | UPLI LOWLI LPAR RPAR LMPAR states RMPAR { Func($1, $2, [], $6) }
    | UPLI LOWLI LPAR funargs RPAR LMPAR states RMPAR { Func($1, $2, $4, $7) }
funargs:
    | UPLI LOWLI COMMA funargs { ($1, $2) :: $4 }
    | UPLI LOWLI { [($1, $2)] }
states:
    | state states { $1 :: $2 }
    | state { [$1] }
state:
    | UPLI LOWLI SCOLON { Let($1, $2) }
    | LOWLI EQUAL exp SCOLON { Assign($1, $3) }
    | RETURN exp SCOLON { Return $2 }
    | exp SCOLON { Exp $1 }
exp:
    | exp PLUS exp1 { Add($1, $3) }
    | exp MINUS exp1 { Sub($1, $3) }
    | exp1 { $1 }
exp1:
    | exp1 STAR exp2 { Mul($1, $3) }
    | exp1 PERCENT exp2 { Mod($1, $3) }
    | exp1 SLASH exp2 { Div($1, $3) }
    | exp2 { $1 }
exp2:
    | MINUS exp3 { Minus($2) }
    | exp3 { $1 }
exp3:
    | LOWLI LPAR RPAR { App($1, []) }
    | LOWLI LPAR exps RPAR { App($1, $3) }
    | exp4 { $1 }
exp4:
    | LPAR exp RPAR { $2 }
    | INTLI { Int $1 }
    | LOWLI { Var $1 }
exps:
    | exp COMMA exps { $1 :: $3 }
    | exp { [$1] }
;

syntax.ml も変更しました。

syntax.ml

open Base

type exp =
    | Int of int
    | Add of exp * exp
    | Sub of exp * exp
    | Mul of exp * exp
    | Div of exp * exp
    | Mod of exp * exp
    | Minus of exp
    | Var of string
    | App of string * exp list

type state =
    | Let of string * string
    | Assign of string * exp
    | Return of exp
    | Exp of exp

type funstate =
    | Func of string * string * (string * string) list * state list

let rec string_of_exp = function
    | Int i -> "Int " ^ string_of_int i
    | Var s -> "Var " ^ s
    | Add(e1, e2) -> "Add(" ^ string_of_exp e1 ^ ", " ^ string_of_exp e2 ^ ")"
    | Sub(e1, e2) -> "Sub(" ^ string_of_exp e1 ^ ", " ^ string_of_exp e2 ^ ")"
    | Mul(e1, e2) -> "Mul(" ^ string_of_exp e1 ^ ", " ^ string_of_exp e2 ^ ")"
    | Div(e1, e2) -> "Div(" ^ string_of_exp e1 ^ ", " ^ string_of_exp e2 ^ ")"
    | Mod(e1, e2) -> "Mod(" ^ string_of_exp e1 ^ ", " ^ string_of_exp e2 ^ ")"
    | Minus e -> "Minus(" ^ string_of_exp e ^ ")"
    | App(s, es) -> "App(" ^ s ^ ", [" ^  join "; " (List.map string_of_exp es) ^ "])"

let string_of_state = function
    | Let(u, s) -> "Let(" ^ u ^ ", " ^ s ^ ")"
    | Assign(s, e) -> "Assign(" ^ s ^ ", " ^ string_of_exp e ^ ")"
    | Return e -> "Return(" ^ string_of_exp e ^ ")"
    | Exp e -> "Exp(" ^ string_of_exp e ^ ")"

let string_of_funstate = function
    | Func(u, s, args, states) -> "Func(" ^ u ^ ", " ^ s ^ ", " ^ join "; " (List.map (fun (x, y) -> "(" ^ x ^ ", " ^ y ^ ")") args) ^ ", " ^ ("[" ^ join "; " (List.map string_of_state states) ^ "]") ^ ")"

base.ml

let rec join s = function
    | [] -> ""
    | [x] -> x
    | x :: xs -> x ^ s ^ join s xs

重要なのが以下の kNormal.ml というプログラムです。型 state では、例えば Add は変数としての文字列を三つ受け取っていますが、二つが計算用と一つが結果を代入するための変数です。元の式からこの state の list を作成する方法は、例えば平坦化の関数 flatten を用いて、 flatten Add(e1, e2) -> 二つの式の平坦化 flatten e1, flatten e2, 変数名の獲得 e1 -> s1, e2 -> s2, Add s3 s1 s2 というように構文木再帰的に平坦化します。

kNormal.ml

open Base

type state =
    | LoadInt of string * int
    | Add of string * string * string
    | Sub of string * string * string
    | Mul of string * string * string
    | Div of string * string * string
    | Mod of string * string * string
    | Minus of string * string
    | Mov of string * string
    | App of string * string * string list
    | Ret of string

type funstate =
    | Func of string * string list * state list

let debug_of_state = function
    | LoadInt(s, i) -> "LoadInt " ^ s ^ " " ^ string_of_int i
    | Add(s0, s1, s2) -> "Add " ^ join " " [s0; s1; s2]
    | Sub(s0, s1, s2) -> "Sub " ^ join " " [s0; s1; s2]
    | Mul(s0, s1, s2) -> "Mul " ^ join " " [s0; s1; s2]
    | Div(s0, s1, s2) -> "Div " ^ join " " [s0; s1; s2]
    | Mod(s0, s1, s2) -> "Mod " ^ join " " [s0; s1; s2]
    | Minus(s0, s1) -> "Minus " ^ s0 ^ " " ^ s1
    | Mov(s0, s1) -> "Mov " ^ s0 ^ " " ^ s1
    | App(s0, s1, xs) -> "App " ^ s0 ^ " " ^ s1 ^ "(" ^ join "," xs ^ ")"
    | Ret s -> "Ret " ^ s

let debug_of_funstate = function
    | Func(s, vs, xs) ->
        s ^ " " ^ "(" ^ join "," vs ^ "){\n" ^ join "\n" (List.map (fun x -> "  " ^ debug_of_state x) xs) ^ "\n}"

let make_variable i = "%" ^ string_of_int i

let rec flatten_exp i = function
    | Syntax.Int n ->
        let s = make_variable i in
        ([LoadInt(s, n)], s, i + 1)
    | Syntax.Add(e1, e2) ->
        let (l1, s1, i1) = flatten_exp i e1 in
        let (l2, s2, i2) = flatten_exp i1 e2 in
        let s = make_variable i2 in
        (l1 @ l2 @ [Add(s, s1, s2)], s, i2 + 1)
    | Syntax.Sub(e1, e2) ->
        let (l1, s1, i1) = flatten_exp i e1 in
        let (l2, s2, i2) = flatten_exp i1 e2 in
        let s = make_variable i2 in
        (l1 @ l2 @ [Sub(s, s1, s2)], s, i2 + 1)
    | Syntax.Mul(e1, e2) ->
        let (l1, s1, i1) = flatten_exp i e1 in
        let (l2, s2, i2) = flatten_exp i1 e2 in
        let s = make_variable i2 in
        (l1 @ l2 @ [Mul(s, s1, s2)], s, i2 + 1)
    | Syntax.Div(e1, e2) ->
        let (l1, s1, i1) = flatten_exp i e1 in
        let (l2, s2, i2) = flatten_exp i1 e2 in
        let s = make_variable i2 in
        (l1 @ l2 @ [Div(s, s1, s2)], s, i2 + 1)
    | Syntax.Mod(e1, e2) ->
        let (l1, s1, i1) = flatten_exp i e1 in
        let (l2, s2, i2) = flatten_exp i1 e2 in
        let s = make_variable i2 in
        (l1 @ l2 @ [Mod(s, s1, s2)], s, i2 + 1)
    | Syntax.Minus e ->
        let (l, s, j) = flatten_exp i e in
        let x = make_variable j in
        (l @ [Minus(x, s)], x, j + 1)
    | Syntax.Var s -> ([], s, i)
    | Syntax.App(s, es) ->
        let (l, vs, j) = flatten_exps i es in
        let x = make_variable j in
        (l @ [App(x, s, vs)], x, j + 1)
and flatten_exps i = function
    | [] -> ([], [], i)
    | e :: es ->
        let (l, s, j) = flatten_exp i e in
        let (ls, xs, k) = flatten_exps j es in
        (l @ ls, s :: xs, k)

let rec flatten_states i = function
    | [] -> []
    | Syntax.Let(t, s) :: xs -> flatten_states i xs
    | Syntax.Assign(s, e) :: xs ->
        let (l, v, j) = flatten_exp i e in
        l @ [Mov(s, v)] @ flatten_states j xs
    | Syntax.Return e :: xs ->
        let (l, v, j) = flatten_exp i e in
        l @ [Ret v] @ flatten_states j xs
    | Syntax.Exp e :: xs ->
        let (l, _, j) = flatten_exp i e in
        l @ flatten_states j xs

let read_funstate = function
    | Syntax.Func(t, v, args, states) -> Func(v, List.map snd args, flatten_states 0 states)

次のような入力を与えます。

Int main(){
    Int v;
    Int k;
    v = 1 + 2 - (3 % 4) + (5 + 6 * 7) / 8;
    k = v + 3 - 1;
    print(k * 2);
    return 0;
}

構文木と平坦化したプログラムは次のようになります。

Func(Int, main, , [Let(Int, v); Let(Int, k); Assign(v, Add(Sub(Add(Int 1, Int 2), Mod(Int 3, Int 4)), Div(Add(Int 5, Mul(Int 6, Int 7)), Int 8))); Assign(k, Sub(Add(Var v, Int 3), Int 1)); Exp(App(print, [Mul(Var k, Int 2)])); Return(Int 0)])
main (){
  LoadInt %0 1
  LoadInt %1 2
  Add %2 %0 %1
  LoadInt %3 3
  LoadInt %4 4
  Mod %5 %3 %4
  Sub %6 %2 %5
  LoadInt %7 5
  LoadInt %8 6
  LoadInt %9 7
  Mul %10 %8 %9
  Add %11 %7 %10
  LoadInt %12 8
  Div %13 %11 %12
  Add %14 %6 %13
  Mov v %14
  LoadInt %15 3
  Add %16 v %15
  LoadInt %17 1
  Sub %18 %16 %17
  Mov k %18
  LoadInt %19 2
  Mul %20 k %19
  App %21 print(%20)
  LoadInt %22 0
  Ret %22
}