java中BNF语法的递归下降解析器



我需要做一个递归下降解析器,遵循语法

<program> → <statements>
<statements>→ <statement> | <statement><semi_colon><statements>
<statement> → <ident><assignment_op><expression>
<expression> → <term><term_tail>
<term_tail> → <add_op><term><term_tail> | ε
<term> → <factor> <factor_tail>
<factor_tail> → <mult_op><factor><factor_tail> | ε
<factor> → <left_paren><expression><right_paren> | <ident> | <const>
<const> → any decimal numbers
<ident> → any names conforming to C identifier rules
<assignment_op> → :=
<semi_colon> → ;
<add_operator> → + | -
<mult_operator> → * | /
<left_paren> → (
<right_paren> →)

我制作了一个词法分析器,它将输入标记为标记类型、标记字符串和标记值。

public Token(int type, String token_string, Object value) {
this.type = type;
this.token_string = token_string;
this.value = value;
}
public class TokenType {
public static final int NUMBER=1;
public static final int LEFT_PAR=2;
public static final int RIGHT_PAR=3;
public static final int PLUS=4;
public static final int MINUS=5;
public static final int STAR=6;
public static final int SLASH=7;
public static final int IDENT=8;
public static final int ASSIGN=9;
public static final int SEMI_COLON=10;

}

我应该解析像

这样的输入operator1:= 100 *(100 - 200);

operator2:= operator1 + 300并返回运算符的值

但是,我不确定如何制作解析器。

END_OF_TOKEN添加到TokenType

public class TokenType {
public static final int NUMBER = 1;
.....
public static final int END_OF_TOKEN = 11;
}

定义一个Tokenizer类。它通过调用get()返回一个Token对象。为简单起见,这里返回传递给构造函数的Token

public class Tokenizer {
Token[] tokens;
int index = 0;
Tokenizer(Token... tokens) {
this.tokens = tokens;
}
public Token get() {
if (index < tokens.length)
return tokens[index++];
else
return new Token(TokenType.END_OF_TOKEN, "End of token", null);
}
}

你定义了一个Parser.

public class Parser {
Tokenizer tokenizer;
Token token;
Map<String, Double> variables = new HashMap<>();
Parser(Tokenizer tokenizer) {
this.tokenizer = tokenizer;
}
double factor() {
if (token.type == TokenType.LEFT_PAR) {
token = tokenizer.get();
double e = expression();
if (token.type != TokenType.RIGHT_PAR)
throw new RuntimeException("')' expected");
token = tokenizer.get();
return e;
} else if (token.type == TokenType.IDENT) {
String name = token.token_string;
token = tokenizer.get();
if (!variables.containsKey(name))
throw new RuntimeException("variable '" + name + "' undefined");
return variables.get(name);
} else if (token.type == TokenType.NUMBER) {
double value = Double.parseDouble(token.token_string);
token = tokenizer.get();
return value;
} else
throw new RuntimeException("unknown token '" + token.token_string + "'");
}
double term() {
double value = factor();
while (true) {
if (token.type == TokenType.STAR) {
token = tokenizer.get();
value *= factor();
} else if (token.type == TokenType.SLASH) {
token = tokenizer.get();
value /= factor();
} else
break;
}
return value;
}
double expression() {
double value = term();
while (true) {
if (token.type == TokenType.PLUS) {
token = tokenizer.get();
value += term();
} else if (token.type == TokenType.MINUS) {
token = tokenizer.get();
value -= term();
} else
break;
}
return value;
}
void statement() {
if (token.type != TokenType.IDENT)
throw new RuntimeException("identifier expected");
String name = token.token_string;
token = tokenizer.get();
if (token.type != TokenType.ASSIGN)
throw new RuntimeException("':=' expected");
token = tokenizer.get();
double value = expression();
variables.put(name, value);
}
void statements() {
while (true) {
statement();
if (token.type != TokenType.SEMI_COLON)
break;
token = tokenizer.get();
}
}
public Map<String, Double> parse() {
token = tokenizer.get();
statements();
if (token.type != TokenType.END_OF_TOKEN)
throw new RuntimeException("extra token '" + token.token_string + "'");
return variables;
}
}

测试:

@Test
public void testParser() {
Tokenizer tokenizer = new Tokenizer(
new Token(TokenType.IDENT, "operator1", null),
new Token(TokenType.ASSIGN, null, null),
new Token(TokenType.NUMBER, "200", null),
new Token(TokenType.PLUS, null, null),
new Token(TokenType.NUMBER, "100", null),
new Token(TokenType.STAR, null, null),
new Token(TokenType.LEFT_PAR, null, null),
new Token(TokenType.NUMBER, "100", null),
new Token(TokenType.MINUS, null, null),
new Token(TokenType.NUMBER, "200", null),
new Token(TokenType.RIGHT_PAR, null, null),
new Token(TokenType.SEMI_COLON, null, null),
new Token(TokenType.IDENT, "operator2", null),
new Token(TokenType.ASSIGN, null, null),
new Token(TokenType.IDENT, "operator1", null),
new Token(TokenType.PLUS, null, null),
new Token(TokenType.NUMBER, "300", null));
Parser parser = new Parser(tokenizer);
Map<String, Double> variables = parser.parse();
System.out.println(variables);
}

输出:

{operator2=-9500.0, operator1=-9800.0}

最新更新