代码之家  ›  专栏  ›  技术社区  ›  maddie

OCaml:如何在没有堆栈的LL解析期间构造AST

  •  0
  • maddie  · 技术社区  · 6 年前

    A parseA parse方法返回一个token列表中的token和。

    我不知道在解析器中调用哪个AST方法。有没有一个通用的方法来解决这个问题?

    这是我的尝试:

    expr -> t eprime 
    eprime -> PLUS t eprime | MINUS t eprime | ε
    t -> t tprime
    tprime -> TIMES f tprime | DIVIDE f tprime | ε
    f -> LPAREN expr RPAREN | LITERAL | TRUE | FALSE | ID
    

    我有四个解析方法,每个非终结符对应一个。

    let parseExpr tokenlist =
        match tokenlist.head with 
        | "LPAREN" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in 
                      let e_expr tokenlist_e = parseEPrime tokenlist_t in
                      (tokenlist_e, Ast.Expression(t_expr, e_expr))
        | "LITERAL" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in 
                      let e_expr tokenlist_e = parseEPrime tokenlist_t in
                      (tokenlist_e, Ast.Expression(t_expr, e_expr))
        | "TRUE" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in 
                      let e_expr tokenlist_e = parseEPrime tokenlist_t in
                      (tokenlist_e, Ast.Expression(t_expr, e_expr))
        | "FALSE" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in 
                      let e_expr tokenlist_e = parseEPrime tokenlist_t in
                      (tokenlist_e, Ast.Expression(t_expr, e_expr))
        | "ID" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in 
                      let e_expr tokenlist_e = parseEPrime tokenlist_t in
                      (tokenlist_e, Ast.Expression(t_expr, e_expr))
    
    
    let parseEPrime tokenlist =
      match tokenlist with
       | "PLUS" -> let expr_t tokenlist_t = next tokenlist |> parseT in
                    let expr_eprime tokenlist_e = parseEPrime tokenlist_t in 
                    (tokenlist_e, Ast.Add(expr_t, expr_eprime))
       | "MINUS" -> let expr_t tokenlist_t = next tokenlist |> parseT in
                    let expr_eprime tokenlist_e = parseEPrime tokenlist_t in 
                    (tokenlist_e, Ast.Minus(expr_t, expr_eprime))
       | "SEMI" -> (tokenlist, [])
       | "RPAREN" -> (tokenlist, [])
       | _ -> raise error  
    
    
    let parseT tokenlist = 
      match tokenlist.lookathead with 
      | "LPAREN" -> let expr_f tokenlist_f = parseF tokenlist in 
                    let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in 
                    (tokenlist_tprime, Ast.F(expr_f, expr_tprime))
      | "LITERAL" -> let expr_f tokenlist_f = parseF tokenlist in 
                    let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in 
                    (tokenlist_tprime, Ast.Literal(expr_f, expr_tprime))
      | "TRUE" -> let expr_f tokenlist_f = parseF tokenlist in 
                    let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in 
                    (tokenlist_tprime, Ast.F(expr_f, expr_tprime))
      | "FALSE" -> let expr_f tokenlist_f = parseF tokenlist in 
                    let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in 
                    (tokenlist_tprime, Ast.F(expr_f, expr_tprime))
      | "ID" -> let expr_f tokenlist_f = parseF tokenlist in 
                    let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in 
                    (tokenlist_tprime, Ast.F(expr_f, expr_tprime))
      | _-> raise error
    
    let parseTprime tokenlist = 
      match  tokenlist.lookathead with
      | "TIMES" -> let expr_f tokenlist_f = next tokenlist |> parseF in 
                    let expr_tprime tokenlist_tprime = parseTPrime tokenlist_f in 
                    (tokenlist_tprime, Ast.Times(expr_f, expr_tprime))
      | "DIVIDE" -> let expr_f tokenlist_f = next tokenlist |> parseF in 
                    let expr_tprime tokenlist_tprime = parseTPrime tokenlist_f in 
                    (tokenlist_tprime, Ast.Divide(expr_f, expr_tprime))
      | "PLUS" -> (tokenlist, [])
      | "MINUS" -> (tokenlist, [])
      | "SEMI" -> (tokenlist, [])
      | "RPAREN" -> (tokenlist, [])
      | _ -> raise error  
    
    let parseF tokenlist = 
      match tokenlist.lookathead with
      | "LPAREN" -> let expr tokenlist_expr = next tokenlist |> parseE in 
                    match next tokenlist_expr with 
                    | "RPAREN" -> (next tokenlist_expr, Ast.ExpressionParen(expr))
      | "LITERAL" -> (next tokenlist, Ast.FLiteral)
      | "TRUE" -> (next tokenlist, Ast.BoolLit)
      | "FALSE" -> (next tokenlist, Ast.FBool)
      | "ID" -> (next tokenlist, Ast.Id)
      | _ -> raise error 
    

    (*expr -> T E* *)
    type expr = 
    | Expression of t eprime 
    
    
    (*T -> F T*)
    type t = 
    | F of f * tprime
    
    (*E* -> + T E* 
    E* -> - T E* 
    E* -> ε  *)
    type eprime = 
    | Add of t eprime
    | Minus of t eprime
    | Eempty
    
    
    (*T* -> TIMES F T* 
    T* -> / F T* 
    T* -> ε*)
    type tprime = 
    | Divide of f * tprime 
    | Times of f * tprime
    | TEmpty
    
    (*F -> LPAREN E RPAREN 
    F -> Literal 
    F -> TRUE 
    F -> FALSE
    F -> ID*)
    type f = 
    | ExpressionParen of expr
    | Literal of int 
    | BoolLit of bool 
    | Id of string
    

    但我不知道我的方法保留了太多不必要的信息,而不是AST通常会抖掉的信息(我想象AST是一个解析树,它会抖掉并去掉不必要的叶子)。到目前为止,我只留下了括号和分号。我恐怕我吃了太多了 type t, type f, type tprime, type eprime 在我看来。但是如果我把它们去掉,我就不知道怎么写 type expr 在我看来。

    2 回复  |  直到 6 年前
        1
  •  1
  •   sepp2k    6 年前

    给定一个定义如下的AST:

    type expr =
      | Add of expr * expr
      | Minus of expr * expr
      | Times of expr * expr
      | Divide of expr * expr
      | IntLit of int 
      | BoolLit of bool 
      | Id of string
    

    Prime 函数将左操作数作为参数,如下所示:

    let parseExpr tokens =
      let (lhs, remainingTokens) = parseT tokens in
      parseExprPrime lhs remainingTokens
    
    let parseExprPrime lhs tokens = match tokenlist.lookahead with
    | PLUS :: tokens ->
      let (rhs, remainingTokens) = parseT (next tokens) in
      parseExprPrime (Add (lhs, rhs)) remainingTokens
    | MINUS :: tokens ->
      let (rhs, remainingTokens) = parseT (next tokens) in
      parseExprPrime (Minus (lhs, rhs)) remainingTokens
    | tokens ->
      lhs, tokens
    

    parseT parseTPrime parseF Ast.ExpressionParen(expr) 只是 expr 因为我也移除了 ExpressionParen

    请注意,这里没有必要区分合法令牌和非法令牌。回来没关系 lhs, tokens 两者都是合法的象征,比如 ; ) tokens 以非法令牌开始,该令牌将被 帕塞夫 ,因此无需在此处进行检查。同样的代码也不需要重复四次,所以您只需调用 帕塞特 parseExprPrime 即使不看当前的令牌,这些函数也会处理它。


    eval: expr -> int 作为一个案例研究(让我们忽略 BoolLit Id 为此目的)。使用原始定义,它将如下所示:

    let rec eval = function
    | Expression (lhs, eprime) -> evalEPrime (evalT lhs) eprime
    
    and evalEPrime lhsValue = function
    | Add (rhs, rest) -> evalEPrime (lhsValue + evalT rhs) rest
    | Minus (rhs, rest) -> evalEPrime (lhsValue - evalT rhs) rest
    | Eempty -> lhsValue
    
    and evalT = function
    | T (lhs, tprime) -> evalTPrime (evalF lhs) tprime
    
    and evalTPrime lhsValue = function
    | Times (rhs, rest) -> evalTPrime (lhsValue * evalF rhs) rest
    | Divide (rhs, rest) -> evalTPrime (lhsValue / evalF rhs) rest
    | TEmpty -> lhsValue
    
    and evalF = function
    | ExpressionParen expr -> eval expr
    | IntLit i -> i
    

    使用简化定义,它将改为:

    let rec eval = function
    | Add (lhs, rhs) -> eval lhs + eval rhs
    | Minus (lhs, rhs) -> eval lhs - eval rhs
    | Times (lhs, rhs) -> eval lhs * eval rhs
    | Divide (lhs, rhs) -> eval lhs / eval rhs
    | IntLit i -> i
    

        2
  •  0
  •   Jeffrey Scofield    6 年前

    如果每个非终结符都有一个类型,那么最终得到的树更具体(类似于解析树),而不是抽象树。

    我不知道这有多糟糕,它仍然是代码的良好表示。

    从一个角度来看,你的语法是如此简单和精简,以至于没有太多需要省略的偶然标点符号来使树更加抽象。