我使用Jison编写了一个解析器,它能够处理带有操作符和布尔运算支持的类似google的搜索查询。目前,我很难弄清楚如何接受与或和非操作符之间的空白。任何帮助都将非常感激,我在下面附上了一些期望输入/输出的例子。
输入:- 真实,,false || true
- (true) &&(false || true)
- true&和假| |真正
1 - 3。((真正的),和((假)| |[事实]))
代码:%lex
%%
/* Lexical Grammar */
"AND"|"&&" { return "AND" }
"OR"|"||" { return "OR" }
"NOT"|"!" { return "NOT" }
"(" { return "OPEN" }
")" { return "CLOSE" }
":" { return "QUAL" }
"-" { return "DASH" }
"""|"'" { return "QUOTE" }
s+ { return "SPACE" }
w+ { return "WORD" }
"." { return "DOT" }
<<EOF>> { return "EOF" }
. { return "INVALID" }
/lex
/* Operators */
%right AND OR
%right NOT
%right QUAL DASH DOT
%start START
%%
/* Language Grammar */
START
: EXP EOF
{ return $1; }
;
EXP
: EXP AND EXP
{ $$ = "(" + $1 + "&&" + $3 + ")"; }
| EXP OR EXP
{ $$ = "(" + $1 + "||" + $3 + ")"; }
| NOT EXP
{ $$ = "(!" + $2 + ")"; }
| OPEN EXP CLOSE
{ $$ = $2; }
| ARGS
{ $$ = "[" + $1 + "]"; }
;
ARGS
: ARG SPACE ARGS
{ $$ = [ $1 ].concat($3); }
| OP SPACE ARGS
{ $$ = [ $1 ].concat($3); }
| ARG
{ $$ = [ $1 ]; }
| OP
{ $$ = [ $1 ]; }
;
OP
: DASH OP
{ $$ = "-" + $2; }
| ARG QUAL ARG
{ $$ = $1 + ":" + $3; }
;
ARG
: DASH ARG
{ $$ = "-" + $2; }
| QUOTE TERMS QUOTE
{ $$ = $2.join(" "); }
| TERM
{ $$ = $1; }
;
TERMS
: TERM SPACE TERMS
{ $$ = [ $1 ].concat($3); }
| TERM
{ $$ = [ $1 ]; }
;
TERM
: TERM DASH TERM
{ $$ = $1 + $2 + $3; }
| TERM DOT TERM
{ $$ = $1 + $2 + $3; }
| WORD
{ $$ = $1; }
;
明白了。我开始忽略空白,改变了一些规则,并解决了冲突。解析器返回一个函数,该函数用于确定某个对象是否与查询匹配。下面是最终结果:
/* Google-Like Parser */
/* Lexical Grammar */
%lex
%%
s+ { /* ignore whitespace */ }
"AND"|"&&" { return "AND" }
"OR"|"||" { return "OR" }
"NOT"|"!" { return "NOT" }
"(" { return "OPEN" }
")" { return "CLOSE" }
":" { return "QUAL" }
"-" { return "NEG" }
"""|"'" { return "QUOTE" }
w+ { return "WORD" }
"." { return "DOT" }
<<EOF>> { return "EOF" }
. { return "INVALID" }
/lex
/* Operators */
%right AND OR
%right NOT
%right QUAL NEG DOT
%start START
%%
/* Language Grammar */
START
: EXP EOF
{ return $1; }
;
EXP
: EXP AND EXP
{ $$ = function(obj) { return ($1(obj) && $3(obj)); }; }
| EXP OR EXP
{ $$ = function(obj) { return ($1(obj) || $3(obj)); }; }
| NOT EXP
{ $$ = function(obj) { return !($2(obj)); }; }
| OPEN EXP CLOSE
{ $$ = $2; }
| ARGS
{ $$ = function(obj) { return parser.processArgs(obj, $1)(obj); }; }
;
ARGS
: ARG ARGS
{ $$ = [ $1, $2]; }
| OP ARGS
{ $$ = [ $1, $2]; }
| ARG
{ $$ = [ $1 ]; }
| OP
{ $$ = [ $1 ]; }
;
OP
: NEG ARG
{{
$2.not = true;
$$ = $2;
}}
| NEG ARG QUAL ARG
{{
$$ = {
"not": true,
"operator": $2.operand,
"operand": $4.operand
};
}}
| ARG QUAL ARG
{{
$$ = {
"not": false,
"operator": $1.operand,
"operand": $3.operand
};
}}
;
ARG
: QUOTE TERMS QUOTE
{{
$$ = {
"not": false,
"operator": null,
"operand": $2.join(" ")
};
}}
| TERM
{{
$$ = {
"not": false,
"operator": null,
"operand": $1
};
}}
;
TERMS
: TERM TERMS
{ $$ = [ $1 ].concat($2); }
| TERM
{ $$ = [ $1 ]; }
;
TERM
: WORD DOT TERM
{ $$ = $1 + $2 + $3; }
| WORD
{ $$ = $1; }
;
%%
parser.processArgs = function(obj, args) {
if (args.length > 1)
{
if (args[0].operator)
return function(obj) { return (parser.matchArg(obj, args[0]) && parser.processArgs(args[1])(obj)); };
else
return function(obj) { return (parser.matchArg(obj, args[0]) || parser.processArgs(args[1])(obj)); };
}
else
{
return function(obj) { return parser.matchArg(obj, args[0]); };
}
}
/* Override Later */
parser.matchArg = function(obj, arg) {
return true;
}