在Python 2.7中解析布尔表达式中的复杂操作数



我正在尝试修改pyparsing中的示例代码,以处理键值对操作数,如:

(Region:US and Region:EU) or (Region:Asia)

这是一个包含三个操作数的布尔表达式——Region:US、Region:EU和Region:Asia。如果它们是像x、y和z这样的简单操作数,我会很乐意去的。我不需要对它们进行任何特殊处理来分解键值对。我需要将操作数的整体视为可能只是x,并需要为其分配真值并计算完整表达式。

我如何修改以下代码来处理此问题:

#
# simpleBool.py
#
# Example of defining a boolean logic parser using
# the operatorGrammar helper method in pyparsing.
#
# In this example, parse actions associated with each
# operator expression will "compile" the expression
# into BoolXXX class instances, which can then
# later be evaluated for their boolean value.
#
# Copyright 2006, by Paul McGuire
# Updated 2013-Sep-14 - improved Python 2/3 cross-compatibility
#
from pyparsing import infixNotation, opAssoc, Keyword, Word, alphas
# define classes to be built at parse time, as each matching
# expression type is parsed
class BoolOperand(object):
    def __init__(self,t):
        self.label = t[0]
        self.value = eval(t[0])
    def __bool__(self):
        return self.value
    def __str__(self):
        return self.label
    __repr__ = __str__
    __nonzero__ = __bool__
class BoolBinOp(object):
    def __init__(self,t):
        self.args = t[0][0::2]
    def __str__(self):
        sep = " %s " % self.reprsymbol
        return "(" + sep.join(map(str,self.args)) + ")"
    def __bool__(self):
        return self.evalop(bool(a) for a in self.args)
    __nonzero__ = __bool__
    __repr__ = __str__
class BoolAnd(BoolBinOp):
    reprsymbol = '&'
    evalop = all
class BoolOr(BoolBinOp):
    reprsymbol = '|'
    evalop = any
class BoolNot(object):
    def __init__(self,t):
        self.arg = t[0][1]
    def __bool__(self):
        v = bool(self.arg)
        return not v
    def __str__(self):
        return "~" + str(self.arg)
    __repr__ = __str__
    __nonzero__ = __bool__
TRUE = Keyword("True")
FALSE = Keyword("False")
boolOperand = TRUE | FALSE | Word(alphas,max=1)
boolOperand.setParseAction(BoolOperand)
# define expression, based on expression operand and
# list of operations in precedence order
boolExpr = infixNotation( boolOperand,
    [
    ("not", 1, opAssoc.RIGHT, BoolNot),
    ("and", 2, opAssoc.LEFT,  BoolAnd),
    ("or",  2, opAssoc.LEFT,  BoolOr),
    ])

if __name__ == "__main__":
    p = True
    q = False
    r = True
    tests = [("p", True),
             ("q", False),
             ("p and q", False),
             ("p and not q", True),
             ("not not p", True),
             ("not(p and q)", True),
             ("q or not p and r", False),
             ("q or not p or not r", False),
             ("q or not (p and r)", False),
             ("p or q or r", True),
             ("p or q or r and False", True),
             ("(p or q or r) and False", False),
            ]
    print("p =", p)
    print("q =", q)
    print("r =", r)
    print()
    for t,expected in tests:
        res = boolExpr.parseString(t)[0]
        success = "PASS" if bool(res) == expected else "FAIL"
        print (t,'n', res, '=', bool(res),'n', success, 'n')

我想用"地区:美国"、"地区:欧盟"one_answers"地区:亚洲"来代替p、q、r。有什么想法吗?

EDIT:根据Paul McGuire的建议,我尝试编写以下在解析时中断的代码:

#
# simpleBool.py
#
# Example of defining a boolean logic parser using
# the operatorGrammar helper method in pyparsing.
#
# In this example, parse actions associated with each
# operator expression will "compile" the expression
# into BoolXXX class instances, which can then
# later be evaluated for their boolean value.
#
# Copyright 2006, by Paul McGuire
# Updated 2013-Sep-14 - improved Python 2/3 cross-compatibility
#
from pyparsing import infixNotation, opAssoc, Keyword, Word, alphas
# define classes to be built at parse time, as each matching
# expression type is parsed
class BoolOperand(object):
    def __init__(self,t):
        self.label = t[0]
        self.value = validValues[t[0]]
    def __bool__(self):
        return self.value
    def __str__(self):
        return self.label
    __repr__ = __str__
    __nonzero__ = __bool__
class BoolBinOp(object):
    def __init__(self,t):
        self.args = t[0][0::2]
    def __str__(self):
        sep = " %s " % self.reprsymbol
        return "(" + sep.join(map(str,self.args)) + ")"
    def __bool__(self):
        return self.evalop(bool(a) for a in self.args)
    __nonzero__ = __bool__
    __repr__ = __str__
class BoolAnd(BoolBinOp):
    reprsymbol = '&'
    evalop = all
class BoolOr(BoolBinOp):
    reprsymbol = '|'
    evalop = any
class BoolNot(object):
    def __init__(self,t):
        self.arg = t[0][1]
    def __bool__(self):
        v = bool(self.arg)
        return not v
    def __str__(self):
        return "~" + str(self.arg)
    __repr__ = __str__
    __nonzero__ = __bool__
TRUE = Keyword("True")
FALSE = Keyword("False")
boolOperand = TRUE | FALSE | Word(alphas+":",max=1)
boolOperand.setParseAction(BoolOperand)
# define expression, based on expression operand and
# list of operations in precedence order
boolExpr = infixNotation( boolOperand,
    [
    ("not", 1, opAssoc.RIGHT, BoolNot),
    ("and", 2, opAssoc.LEFT,  BoolAnd),
    ("or",  2, opAssoc.LEFT,  BoolOr),
    ])

if __name__ == "__main__":
    validValues = {
        "Region:US": False,
        "Region:EU": True,
        "Type:Global Assets>24": True
    }
    tests = [("Region:US", True),
             ("Region:EU", False),
             ("Region:US and Region:EU", False),
             ("Region:US and not Region:EU", True),
             ("not not Region:US", True),
             ("not(Region:US and Region:EU)", True),
             ("Region:EU or not Region:US and Type:Global Assets>24", False),
             ("Region:EU or not Region:US or not Type:Global Assets>24", False),
             ("Region:EU or not (Region:US and Type:Global Assets>24)", False),
             ("Region:US or Region:EU or Type:Global Assets>24", True),
             ("Region:US or Region:EU or Type:Global Assets>24 and False", True),
             ("(Region:US or Region:EU or Type:Global Assets>24) and False", False),
            ]
    print("Region:US =", validValues["Region:US"])
    print("Region:EU =", validValues["Region:EU"])
    print("Type:Global Assets>24 =", validValues["Type:Global Assets>24"])
    print()
    for t,expected in tests:
        res = boolExpr.parseString(t)[0]
        success = "PASS" if bool(res) == expected else "FAIL"
        print (t,'n', res, '=', bool(res),'n', success, 'n')

感谢Paul McGuire的帮助,以下是解决方案:

boolOperand = TRUE | FALSE | Combine(Word(alphas)+":"+quotedString) | Word(alphas+":<>")

这按照我的意愿进行解析。

进行此更改有两部分:更改解析器,然后更改解析后的行为以适应这些新值。

要解析不仅仅是简单的1字符名称的操作数,请在解析器中更改以下行:

boolOperand = TRUE | FALSE | Word(alphas,max=1)

最简单(但不是最严格)的方法是将其更改为:

boolOperand = TRUE | FALSE | Word(alphas+":")

但是,除了您的有效值"Region:US"或"TimeZone:UTC"之外,还可以接受可能无效的值,如"XouEWRL:sdlkfj"、":sldjf:ljsdf:sdljf",甚至"::。如果您想加强解析器,您可以强制密钥条目为:

valid_key = oneOf("Region Country City State ZIP")
valid_value = Word(alphas+"_")
valid_kv = Combine(valid_key + ":" + valid_value)
boolOperand = TRUE | FALSE | valid_kv

这应该负责解析器。

其次,在解析完成后,您需要更改对该条目的评估方式。在我的例子中,我强调的是解析部分,而不是评估部分,所以我只剩下调用eval()内置函数。在您的情况下,您可能需要为每个可接受的键值对初始化一个有效值的dict,然后更改BoolOperand中的代码以进行dict查找,而不是调用eval。(这还有一个额外的好处,而不是用用户输入的数据调用eval(),这有各种潜在的安全问题。)

最新更新