我正在尝试用flex/bison构建一种小型编程语言,比如ruby。如果全部用C编写,Flex和bison可以很好地协同工作。当我需要C++为我的非终结符(expr、语句等(构建类时,问题就开始了。
mRuby.l:
%option yylineno
%{
#include "absyn.h"
#include "mRuby.tab.h"
int line_nr = 1;
int col_nr = 1;
/*
col_nr += strlen(atoi(yytext));
*/
%}
identifier [A-z]([A-z]|[0-9])*
integer -?([0-9])+(_?[0-9])*
comment ("#".*"n")
whitespace (" "|"t")+
boolean (true|false)
CR (rn)*|(n)*|(r)*
%%
";" { return SEMICOLON; }
"undef" { return UNDEF; }
"def" { return DEF; }
"(" { return LPAREN; }
")" { return RPAREN; }
"end" { return END;}
"return" { return RETURN;}
"if" { return IF; }
"then" { return THEN; }
"elsif" { return ELSIF;}
"else" { return ELSE; }
"unless" { return UNLESS; }
"while" { return WHILE; }
"do" { return DO; }
"until" { return UNTIL; }
"case" { return CASE; }
"when" { return WHEN; }
"," { return COMMA; }
"=" { return ASSIGN; }
"+=" { return PLUSASSIGN; }
"-=" { return MINUSASSIGN; }
"*=" { return MULASSIGN; }
"/=" { return DIVASSIGN; }
"&&=" { return ANDASSIGN; }
"||=" { return ORASSIGN; }
"+" { return PLUS; }
"-" { return MINUS; }
"*" { return MUL; }
"/" { return DIV; }
">" { return GT; }
">=" { return GE; }
"<" { return LT; }
"<=" { return LE; }
"==" { return EQ; }
"!=" { return NE; }
"&&" { return AND; }
"||" { return OR; }
"!" { return NOT; }
"n" { col_nr = 1; return SEMICOLON; }
{boolean} { return BOOLEAN; }
{comment}|{whitespace} { /* doe niets */ }
{integer} {
return INTEGER;
}
{identifier} {
char* s = (char*) malloc(yyleng+1);
strcpy(s, yytext);
return IDENTIFIER;
}
. {
if (yytext[0] < ' '){
/* non-printable char */
/*yyerror*/
fprintf(stderr,"illegal character: ^%c",yytext[0] + '@');
}
else {
if (yytext[0] > '~') {
/* non-printable char printed as octal int padded with zeros, eg 12*/
/*yyerror(*/
fprintf(stderr,"illegal character: \%03o", (int) yytext[0]);
}
else {
/*dit is een functie verwijder enter om te gebruiken*/
/*yyerror(*/
fprintf(stderr,"illegal character: %s",yytext);
}
}
/* lex read exactly one char; the illegal one */
//fprintf(stderr," at line %d column %dn", line_nr, (col_nr-1));
fprintf(stderr," at line %d column %dn", yylineno, (col_nr-1));
}
%%
/* Function called by (f)lex when EOF is read. If yywrap returns a
true (non-zero) (f)lex will terminate and continue otherwise.*/
int yywrap(){
return (1);
}
我的野牛文件:
%{
#include "lexer.h"
#include "absyn.h"
#include <iostream>
void yyerror(const char* str);
int main(int argc, char* argv[]){
int tokenid;
std::cout << "Hello world! n";
//return yyparse();
yyparse();
std::cout << "TEST n";
return 0;
}
%}
%union {
int g;
char* id;
char* b;
Stmts stmts;
Stmt stmt;
CaseStmt casestmt;
WhenStmt whenstmt;
IfStmt ifstmt;
ElifStmt elifstmt;
Expr expr;
Exprs exprs;
ArgList arglist;
ArgLists arglists;
Ids ids;
T t;
Assignop assignop;
Binop binop;
}
// vul aan met tokendeclaraties
%token
SEMICOLON UNDEF DEF LPAREN RPAREN END RETURN INTEGER
IF THEN ELSIF ELSE UNLESS WHILE DO UNTIL CASE WHEN COMMA
ASSIGN PLUSASSIGN MINUSASSIGN MULASSIGN DIVASSIGN ANDASSIGN ORASSIGN
PLUS MINUS MUL DIV GT GE LT LE EQ NE AND OR NOT
%token <id> IDENTIFIER
%token <b> BOOLEAN
%type <stmts> stmts
%type <stmt> stmt
%type <casestmt> casestmt
%type <whenstm> whenstmt
%type <ifstmt> ifstmt
%type <elifstm> elifstmt
%type <expr> expr
%type <exprs> exprs
%type <arglist> arglist
%type <arglists> arglists
%type <ids> ids
%type <t> t
%type <assignop> assignop
%type <binop> binop
%type <binop> PLUS
// vul aan met voorrangdeclaraties
%nonassoc operation
%nonassoc expression
%nonassoc OR NE EQ LT LE GT GE AND
%left PLUS MINUS
%left TIMES DIVIDES
%right ASSOP
%right UNOT
%right UMINUS
//%defines
%%
// vul aan met producties
program : compstmt { std::cout << "program 0"; }
;
compstmt : stmts { std::cout << " compstmt 1"; }
| stmts t { std::cout << " compstmt 2"; }
;
stmts : stmt { std::cout << " stmts 1"; }
| stmts t stmt { std::cout << " stmts 2 "; }
;
stmt : undefstmt { std::cout << " stmt 1"; }
| expr { std::cout << " stmt 2"; }
| defstmt { std::cout << " stmt 3"; }
| returnstmt { std::cout << "stmt 4"; }
| ifstmt { std::cout << "stmt 5"; }
| whilestmt { std::cout << " stmt 6"; }
| untilstmt { std::cout << " stmt 7"; }
| unlessstmt { std::cout << " stmt 8"; }
| casestmt { std::cout << " stmt 9"; }
| error { std::cout << " error"; }
;
undefstmt : UNDEF IDENTIFIER { std::cout << " undefstmt"; }
;
defstmt : DEF IDENTIFIER LPAREN arglists RPAREN compstmt END { std::cout << " defstmt"; }
;
returnstmt : RETURN expr { std::cout << " returnstmt"; }
;
whilestmt : WHILE expr DO compstmt END { std::cout << " whilestmt"; }
;
untilstmt : UNTIL expr DO compstmt END { std::cout << " untilstmt"; }
;
unlessstmt : UNLESS expr THEN compstmt END { std::cout << " unless 1"; }
| UNLESS expr THEN compstmt ELSE compstmt END { std::cout << " unless 2"; }
;
casestmt : CASE expr WHEN expr THEN compstmt END { std::cout << " casestmt "; }
| CASE expr WHEN expr THEN compstmt ELSE compstmt END { std::cout << " casestmt "; }
| CASE expr WHEN expr THEN compstmt whenstmt END { std::cout << " casestmt "; }
| CASE expr WHEN expr THEN compstmt whenstmt ELSE compstmt END { std::cout << " casestmt "; }
;
whenstmt : WHEN expr THEN compstmt { std::cout << " whenstmt "; }
| whenstmt WHEN expr THEN compstmt { std::cout << " whenstmt "; }
;
ifstmt : IF expr THEN compstmt END { std::cout << "ifstmt"; }
| IF expr THEN compstmt ELSE compstmt END { std::cout << "ifstmt"; }
| IF expr THEN compstmt elifstmt END { std::cout << "ifstmt"; }
| IF expr THEN compstmt elifstmt ELSE compstmt END { std::cout << "ifstmt"; }
;
elifstmt : ELSIF expr THEN compstmt { std::cout << " elifstmt "; }
| elifstmt ELSIF expr THEN compstmt { std::cout << " elifstmt "; }
;
expr : IDENTIFIER { std::cout << " expr 1"; }
| IDENTIFIER assignop expr %prec ASSOP { std::cout << " expr 2"; }
| NOT expr %prec UNOT { std::cout << " expr 3"; }
| BOOLEAN { std::cout << " expr 4"; }
| MINUS expr %prec UMINUS { std::cout << " expr 5"; }
| IDENTIFIER LPAREN RPAREN { std::cout << " expr 6"; }
| IDENTIFIER LPAREN exprs RPAREN { std::cout << " expr 7"; }
| expr binop expr { std::cout << " expr 8"; }
;
exprs : expr { std::cout << " exprs "; }
| exprs COMMA expr { std::cout << " exprs "; }
;
arglists : arglist { std::cout << " arglists "; }
| arglists arglist { std::cout << " arglists "; }
;
arglist : IDENTIFIER { std::cout << " arglist "; }
| IDENTIFIER ids { std::cout << " arglist "; }
;
ids : SEMICOLON IDENTIFIER { std::cout << " ids "; }
| SEMICOLON IDENTIFIER ids { std::cout << " ids "; }
;
t : SEMICOLON { std::cout << " t "; }
;
assignop : ASSIGN { std::cout << "assop" ; }
| PLUSASSIGN { std::cout << "assop" ; }
| MINUSASSIGN { std::cout << "assop" ; }
| MULASSIGN { std::cout << "assop" ; }
| DIVASSIGN { std::cout << "assop" ; }
| ANDASSIGN { }
| ORASSIGN { }
;
binop : PLUS { Binop op = $1; $$ = op; }
| MINUS { std::cout << "expr MINUS exprn"; }
| MUL { std::cout << "expr MUL exprn"; }
| DIV { std::cout << "expr DIV exprn"; }
| LE { std::cout << "expr LE exprn"; }
| LT { std::cout << "expr LT exprn"; }
| GE { std::cout << "expr GE exprn"; }
| GT { std::cout << "expr GT exprn"; }
| EQ { std::cout << "expr EQ exprn"; }
| NE { std::cout << "expr NE exprn"; }
| AND { std::cout << "expr AND exprn"; }
| OR { std::cout << "expr OR exprn"; }
;
%%
void yyerror (const char *s)
{
// $$ = new OpExpr($1, $2, $3);
//std::cout << "%fn",($1+$3));
}
我尝试了多种混合includes和编译顺序的方法。我尝试编译所有内容的最新方法是这样的:
bison mRuby.yy
cp -R mRuby.yy mRuby.y
bison -d mRuby.y
flex mRuby.l
gcc -c lex.yy.c mRuby.tab.c -ll -ly
g++ lex.yy.o -c
g++ mRuby.tab.cc -o parser
我的目标是用类构造来代替c++print语句,以便在c++中构建解析树和解释器。
Bison根据Bison手册第9节中所述的原始文件后缀生成带有后缀的文件。
如果将头作为#include "mRuby.tab.h"
包含在内,则您的Bison文件应命名为mRuby.y(如果您在Bison中使用c++,则我建议使用像.ypp这样的c++后缀,它将生成.cpp和.hpp文件(。
使用生成文件
flex mRuby.l
bison -d mRuby.y
g++ mRuby.tab.c lex.yy.c -o parser
似乎工作得很好,尽管我很难在没有正确类型定义的头文件的情况下进行检查。请注意,flex和bison文件都是用c++编译的,这种方式对flex和bisson都很好。
此示例的Bison输出显示了18个移位/减少冲突。