Ocaml:递归函数中Ast类型的构建列表



我有一个手写的LL1解析器。我的AST并没有尽可能简化。语句部分如下:

type stmt_opt = StmtExpression of assignment | OptNil
[@@deriving show]
(*stmt_list -> stmt stmt_list | ε *)
type stmtlist =
| StmtList of stmt * stmtlist
| StmtlistNil 
[@@deriving show]
and stmt = 
| Assignment of assignment
| Return of stmt_opt 
| Parentheses of stmtlist
| If of assignment * stmt
| For of assignment * assignment * assignment * stmt
| While of assignment * stmt
(*“lparen” formals_opt “rparen” “LBRACE” vdecl_list stmt_list “RBRACE”*)
[@@deriving show]

正如你所看到的,我仍然掌握着很多不必要的信息。我想把我的声明建立成这样:

type stmt =
Block of stmt list
| Expr of expr
| Return of expr
| If of expr * stmt * stmt
| For of expr * expr * expr * stmt
| While of expr * stmt

我有点不知所措,因为我真的是根据书本构建了我的LL1解析器(我相信这些书预计不会有很长的语法(:每个非终结符都有一个解析方法,每个解析方法都返回一个标记登记和一个ast。

我认为,为了构建像我的目标语句AST中那样的Block类型,我需要在递归parseStmt方法中构建一个语句列表。我已经将我的解析器代码简化为只调用parseStmtList的解析器方法和它们调用parseStmtList 的特定实例

(*stmt_list = stmt stmt_list | epsilon*)
let rec parseStmtList tokenlist lst = 
match tokenlist.head with 
| Lexer.RightBrace -> (tokenlist, Ast.StmtlistNil )
| _ -> let (tokenlist_stmt, stmt) = parseStmt tokenlist in 
let new_lst = lst::stmt in
let (tokenlist_stmt_list, stmt_list) = tokenlist_stmt new_lst |> parseStmtList in
(tokenlist_stmt_list, Ast.Block(stmt_lst))
(*stmt -> assignment SEMI 
|  RETURN stmt_opt SEMI
|  LBRACE stmt_list RBRACE 
|  IF LPAREN assignment RPAREN stmt 
|  FOR LPAREN assignment SEMI assignment SEMI assignment RPAREN stmt  
|  WHILE LPAREN assignment RPAREN stmt
*)
and parseStmt tokenlist = 
begin
match tokenlist.head with 
| Lexer.ID identifier -> let (tokenlist_assignment, assignment) = parseAssignment tokenlist in
begin
match tokenlist_assignment.head with
| Lexer.Semicolon -> (next tokenlist_assignment, Ast.Assignment(assignment))
| _-> let err_msg = __LOC__ ^ "Syntax Error semicolon expected but received" ^ show_token_list tokenlist in
raise (Syntax_error err_msg) 
end
| Lexer.LeftBrace -> let tokenlist_leftbrace = next tokenlist in 
let (tokenlist_expr, expr) = parseStmtList tokenlist_leftbrace [] in
begin
match tokenlist_expr.head with
| Lexer.RightBrace -> (next tokenlist_expr, Ast.Parentheses(expr))
| _-> let err_msg = __LOC__ ^ "Syntax Error right brace expected but received" ^ show_token_list tokenlist in
raise (Syntax_error err_msg)
end
| _-> let err_msg = __LOC__ ^ "Syntax Error left brace expected but received" ^ show_token_list tokenlist in
raise (Syntax_error err_msg)
end

然而,我得到了错误:

Error: This expression has type 'a -> token_list * Ast.stmtlist
but an expression was expected of type 'b * 'c

对于parseStmtList中的线路let (tokenlist_stmt_list, stmt_list) = tokenlist_stmt new_lst |> parseStmtList in

tokenlist_stmt new_lst |> parseStmtList

这里,将tokenlist_stmt应用于参数new_lst,然后将parseStmtList应用于结果。但是tokenlist_stmt实际上并不是一个函数,所以这是一个类型错误。

假设您的意图是用tokenlist_stmtnew_lst作为其两个自变量来调用parseStmtList。语法很简单:

parseStmtList tokenlist_stmt new_lst

此外,lst::stmt也是一个类型错误,原因有两个:

  1. ::将列表作为其右操作数,而不是左操作数,因此它应该是stmt::lst
  2. lst实际上不是一个列表,它是一个Ast.Block,因为这是parseStmtList返回的内容

一旦你修复了所有这些,你会注意到列表将是错误的(大概这就是你最初尝试lst::stmt的原因,但你不能像那样附加到列表的末尾(。当使用累加器构建列表时,这是一个常见的问题。解决方案是,一旦构建完成,就反转列表,或者一开始就不使用累加器。


需要指出的一件事是,当使用Ast.stmtlist时,所有这些问题都会适用。也就是说,如果你的代码看起来像这样:

let new_lst = Ast.StmtList(lst, stmt) in
let (tokenlist_stmt_list, stmt_list) = tokenlist_stmt new_lst |> parseStmtList in
(tokenlist_stmt_list, Ast.Block(stmt_lst))

然后你会得到完全相同的错误。这让我觉得,你已经改变了比你需要的更多的代码。由于你的旧代码可能有效,我认为它看起来像这样:

let rec parseStmtList tokenlist = 
match tokenlist.head with 
| Lexer.RightBrace -> (tokenlist, Ast.StmtlistNil )
| _ -> let (tokenlist_stmt, stmt) = parseStmt tokenlist in 
let (tokenlist_stmt_list, stmt_list) = parseStmtList tokenlist_stmt in
(tokenlist_stmt_list, Ast.StmtList (stmt, stmt_lst))

然后在parseStmt中,您有:

let (tokenlist_stmtlist, stmtlist) = parseStmtList tokenlist_leftbrace in
begin
match tokenlist_expr.head with
| Lexer.RightBrace -> (next tokenlist_stmtlist, Ast.Block(stmtlist))

现在,在删除Ast.stmtlist之后,您只需要更改实际使用其构造函数的部分,并将这些部分替换为列表构造函数(::[](。因此,parseStmt中的代码将完全保持不变,parseStmtList中唯一的变化应该是替换行

| Lexer.RightBrace -> (tokenlist, Ast.StmtlistNil )

带有

| Lexer.RightBrace -> (tokenlist, [] )

和线路

(tokenlist_stmt_list, Ast.StmtList (stmt, stmt_lst))

带有

(tokenlist_stmt_list, stmt :: stmt_lst)

如果您的旧代码看起来与我上面提出的不同,您可能需要更改不同的行,但想法保持不变:用::替换Ast.StmtList,用[]替换Ast.StmtListNil

就这样。这就是所有必要的改变。你太复杂了。

最新更新