类似google的Jison解析器



我使用Jison编写了一个解析器,它能够处理带有操作符和布尔运算支持的类似google的搜索查询。目前,我很难弄清楚如何接受与或和非操作符之间的空白。任何帮助都将非常感激,我在下面附上了一些期望输入/输出的例子。

输入:

  1. 真实,,false || true
  2. (true) &&(false || true)
  3. true&和假| |真正
结果:

1 - 3。((真正的),和((假)| |[事实]))

代码:

%lex
%%
/* Lexical Grammar */
"AND"|"&&"          { return "AND" }
"OR"|"||"           { return "OR" }
"NOT"|"!"           { return "NOT" }
"("                 { return "OPEN" }
")"                 { return "CLOSE" }
":"                 { return "QUAL" }
"-"                 { return "DASH" }
"""|"'"            { return "QUOTE" }
s+                 { return "SPACE" }
w+                 { return "WORD" }
"."                 { return "DOT" }
<<EOF>>             { return "EOF" }
.                   { return "INVALID" }
/lex
/* Operators */
%right AND OR
%right NOT
%right QUAL DASH DOT
%start START
%%
/* Language Grammar */
START
    : EXP EOF
        { return $1; }
    ;
EXP
    : EXP AND EXP
        { $$ = "(" + $1 + "&&" + $3 + ")"; }
    | EXP OR EXP
        { $$ = "(" + $1 + "||" + $3 + ")"; }
    | NOT EXP
        { $$ = "(!" + $2 + ")"; }
    | OPEN EXP CLOSE
        { $$ = $2; }
    | ARGS
        { $$ = "[" + $1 + "]"; }
    ;
ARGS
    : ARG SPACE ARGS
        { $$ = [ $1 ].concat($3); }
    | OP SPACE ARGS
        { $$ = [ $1 ].concat($3); }
    | ARG
        { $$ = [ $1 ]; }
    | OP
        { $$ = [ $1 ]; }
    ;
OP
    : DASH OP
        { $$ = "-" + $2; }
    | ARG QUAL ARG
        { $$ = $1 + ":" + $3; }
    ;
ARG
    : DASH ARG
        { $$ = "-" + $2; }
    | QUOTE TERMS QUOTE
        { $$ = $2.join(" "); }
    | TERM
        { $$ = $1; }
    ;
TERMS
    : TERM SPACE TERMS
        { $$ = [ $1 ].concat($3); }
    | TERM
        { $$ = [ $1 ]; }
    ;
TERM
    : TERM DASH TERM
        { $$ = $1 + $2 + $3; }
    | TERM DOT TERM
        { $$ = $1 + $2 + $3; }
    | WORD
        { $$ = $1; }
    ;

明白了。我开始忽略空白,改变了一些规则,并解决了冲突。解析器返回一个函数,该函数用于确定某个对象是否与查询匹配。下面是最终结果:

/* Google-Like Parser */
/* Lexical Grammar */
%lex
%%
s+                   { /* ignore whitespace */ }
"AND"|"&&"            { return "AND" }
"OR"|"||"             { return "OR" }
"NOT"|"!"             { return "NOT" }
"("                   { return "OPEN" }
")"                   { return "CLOSE" }
":"                   { return "QUAL" }
"-"                   { return "NEG" }
"""|"'"              { return "QUOTE" }
w+                   { return "WORD" }
"."                   { return "DOT" }
<<EOF>>               { return "EOF" }
.                     { return "INVALID" }
/lex
/* Operators */
%right AND OR
%right NOT
%right QUAL NEG DOT
%start START
%%
/* Language Grammar */
START
    : EXP EOF
        { return $1; }
    ;
EXP
    : EXP AND EXP
        { $$ = function(obj) { return ($1(obj) && $3(obj)); }; }
    | EXP OR EXP
        { $$ = function(obj) { return ($1(obj) || $3(obj)); }; }
    | NOT EXP
        { $$ = function(obj) { return !($2(obj)); }; }
    | OPEN EXP CLOSE
        { $$ = $2; }
    | ARGS
        { $$ = function(obj) { return parser.processArgs(obj, $1)(obj); }; }
    ;
ARGS
    : ARG ARGS
        { $$ = [ $1, $2]; }
    | OP ARGS
        { $$ = [ $1, $2]; }
    | ARG
        { $$ = [ $1 ]; }
    | OP
        { $$ = [ $1 ]; }
    ;
OP
    : NEG ARG
        {{
            $2.not = true;
            $$ = $2;
        }}
    | NEG ARG QUAL ARG
        {{
            $$ = {
                "not": true,
                "operator": $2.operand,
                "operand": $4.operand
            };
        }}
    | ARG QUAL ARG
        {{
            $$ = {
                "not": false,
                "operator": $1.operand,
                "operand": $3.operand
            };
        }}
    ;
ARG
    : QUOTE TERMS QUOTE
        {{
            $$ = {
                "not": false,
                "operator": null,
                "operand": $2.join(" ")
            };
        }}
    | TERM
        {{
            $$ = {
                "not": false,
                "operator": null,
                "operand": $1
            };
        }}
    ;
TERMS
    : TERM TERMS
        { $$ = [ $1 ].concat($2); }
    | TERM
        { $$ = [ $1 ]; }
    ;
TERM
    : WORD DOT TERM
        { $$ = $1 + $2 + $3; }
    | WORD
        { $$ = $1; }
    ;
%%
parser.processArgs = function(obj, args) {
    if (args.length > 1)
    {
        if (args[0].operator)
            return function(obj) { return (parser.matchArg(obj, args[0]) && parser.processArgs(args[1])(obj)); };
        else
            return function(obj) { return (parser.matchArg(obj, args[0]) || parser.processArgs(args[1])(obj)); };
    }
    else
    {
        return function(obj) { return parser.matchArg(obj, args[0]); };
    }
}
/* Override Later */
parser.matchArg = function(obj, arg) {
    return true;
}

相关内容

  • 没有找到相关文章

最新更新