给定如下数据类:
class MessageHeader(BaseModel):
message_id: uuid.UUID
def dict(self, **kwargs):
return json.loads(self.json())
当我在MessageHeader
上调用dict
时,我想获得字符串字面量的字典dictionary的期望结果如下所示:
{'message_id': '383b0bfc-743e-4738-8361-27e6a0753b5a'}
我想避免使用第三方库,如pydantic
&我不想使用json.loads(self.json())
,因为有额外的往返
是否有更好的方法将数据类转换为上面的字符串字面值的字典?
您可以使用dataclasses.asdict
:
from dataclasses import dataclass, asdict
class MessageHeader(BaseModel):
message_id: uuid.UUID
def dict(self):
return {k: str(v) for k, v in asdict(self).items()}
如果您确定您的类只有字符串值,则可以完全跳过字典推导:
class MessageHeader(BaseModel):
message_id: uuid.UUID
dict = asdict
对于绝对纯速度和无限效率,甚至可以引起像查克·诺里斯这样的人停下来,无奈地敬畏地看着,我谦卑地推荐这个非常精心策划的方法与__dict__
:
def dict(self):
_dict = self.__dict__.copy()
_dict['message_id'] = str(_dict['message_id'])
return _dict
对于定义了__slots__
属性的类例如,对于@dataclass(slots=True)
,上述方法很可能不起作用,因为__dict__
属性在类实例上不可用。在这种情况下,一个高效的"登月计划"下面的方法可能是可行的:
def dict(self):
body_lines = ','.join(f"'{f}':" + (f'str(self.{f})' if f == 'message_id'
else f'self.{f}') for f in self.__slots__)
# Compute the text of the entire function.
txt = f'def dict(self):n return {{{body_lines}}}'
ns = {}
exec(txt, locals(), ns)
_dict_fn = self.__class__.dict = ns['dict']
return _dict_fn(self)
如果现在有人在他们的座位边缘摇摇欲倒(我知道,这真的是不可思议的,突破级别的东西)-我已经通过下面的timeit
模块添加了我的个人计时,这应该有希望在性能方面的东西更多的光。
仅供参考,使用纯__dict__
的方法不可避免地比dataclasses.asdict()
快得多。
注意:尽管
__dict__
在这种特殊情况下工作得更好,但dataclasses.asdict()
可能更适合组合字典,例如具有嵌套数据类的字典,或者具有可变类型的值,如dict
或list
。
from dataclasses import dataclass, asdict, field
from uuid import UUID, uuid4
class DictMixin:
"""Mixin class to add a `dict()` method on classes that define a __slots__ attribute"""
def dict(self):
body_lines = ','.join(f"'{f}':" + (f'str(self.{f})' if f == 'message_id'
else f'self.{f}') for f in self.__slots__)
# Compute the text of the entire function.
txt = f'def dict(self):n return {{{body_lines}}}'
ns = {}
exec(txt, locals(), ns)
_dict_fn = self.__class__.dict = ns['dict']
return _dict_fn(self)
@dataclass
class MessageHeader:
message_id: UUID = field(default_factory=uuid4)
string: str = 'a string'
integer: int = 1000
floating: float = 1.0
def dict1(self):
_dict = self.__dict__.copy()
_dict['message_id'] = str(_dict['message_id'])
return _dict
def dict2(self):
return {k: str(v) if k == 'message_id' else v
for k, v in self.__dict__.items()}
def dict3(self):
return {k: str(v) if k == 'message_id' else v
for k, v in asdict(self).items()}
@dataclass(slots=True)
class MessageHeaderWithSlots(DictMixin):
message_id: UUID = field(default_factory=uuid4)
string: str = 'a string'
integer: int = 1000
floating: float = 1.0
def dict2(self):
return {k: str(v) if k == 'message_id' else v
for k, v in asdict(self).items()}
if __name__ == '__main__':
from timeit import timeit
header = MessageHeader()
header_with_slots = MessageHeaderWithSlots()
n = 10000
print('dict1(): ', timeit('header.dict1()', number=n, globals=globals()))
print('dict2(): ', timeit('header.dict2()', number=n, globals=globals()))
print('dict3(): ', timeit('header.dict3()', number=n, globals=globals()))
print('slots -> dict(): ', timeit('header_with_slots.dict()', number=n, globals=globals()))
print('slots -> dict2(): ', timeit('header_with_slots.dict2()', number=n, globals=globals()))
print()
dict__ = header.dict1()
print(dict__)
asdict__ = header.dict3()
print(asdict__)
assert isinstance(dict__['message_id'], str)
assert isinstance(dict__['integer'], int)
assert header.dict1() == header.dict2() == header.dict3()
assert header_with_slots.dict() == header_with_slots.dict2()
我的Mac M1笔记本电脑的测试结果:
dict1(): 0.005992999998852611
dict2(): 0.00800508284009993
dict3(): 0.07069579092785716
slots -> dict(): 0.00583599996753037
slots -> dict2(): 0.07395245810039341
{'message_id': 'b4e17ef9-1a58-4007-9cef-39158b094da2', 'string': 'a string', 'integer': 1000, 'floating': 1.0}
{'message_id': 'b4e17ef9-1a58-4007-9cef-39158b094da2', 'string': 'a string', 'integer': 1000, 'floating': 1.0}
注意:对于更"完整"的"DictMixin
(命名为SerializableMixin
)的实现,查看我也添加的相关答案。
灵感来自@rv。根据kvetch的回答,我编写了这个装饰器,它将根据类定义动态地为asdict
方法生成代码。它还支持子类化,这意味着子类将继承超类的属性。
修饰符:
import typing
def generate_dict_method(
__source: typing.Literal["slots", "annotations"],
__name: str,
/,
**custom_mappings: typing.Callable[[typing.Any], typing.Any]
):
if custom_mappings is None:
custom_mappings = dict()
def decorator(cls):
attributes = set()
for mc in cls.__mro__:
if __source == 'annotations':
attrs = getattr(mc, "__annotations__", None)
if attrs:
attrs = attrs.keys()
elif __source == "slots":
attrs = getattr(mc, "__slots__", None)
else:
raise NotImplementedError(__source)
if attrs:
attributes.update(attrs)
if not attributes:
raise RuntimeError(
f"Unable to generate `{__name}` method for `{cls.__qualname__}` class: "
"no attributes found."
)
funclocals = {}
mapping_to_funcname = {}
for attrname, f in custom_mappings.items():
funcname = f'__parse_{attrname}'
funclocals[funcname] = f
mapping_to_funcname[attrname] = funcname
body_lines = ','.join([
f'"{attrname}": ' + (f'self.{attrname}' if attrname not in custom_mappings
else f'{mapping_to_funcname[attrname]}(self.{attrname})')
for attrname in attributes
])
txt = f'def {__name}(self):n return {{{body_lines}}}'
d = dict()
exec(txt, funclocals, d)
setattr(cls, __name, d[__name])
return cls
return decorator
用法:
from dataclasses import dataclass
import json
@dataclass(slots=True, kw_only=True)
class TestBase:
i1: int
i2: int
@generate_dict_method("annotations", "asdict", d=(lambda x: "FUNNY" + json.dumps(x) + "JSON"))
@dataclass(slots=True, kw_only=True)
class Test(TestBase):
i: int
b: bool
s: str
d: dict
a = Test(i=1, b=True, s="test", d={"test": "test"}, i1=2, i2=3)
print(a.asdict())
输出:{'d': 'FUNNY{"test": "test"}JSON', 'i': 1, 'i1': 2, 'b': True, 's': 'test', 'i2': 3}
可以看到,您只需要为带有属性名称的**custom_mappings
参数提供一个自定义解析器。这样你就可以以任何你认为合适的方式改变这个属性。
在您的例子中,您可以为message_id
属性提供str
函数。
这是"dataclass to dict"的google搜索结果,上面的答案太复杂了。您可能正在寻找这个:
from dataclasses import dataclass
@dataclass
class MessageHeader():
uuid: str = "abcd"
vars(MessageHeader()) # or MessageHeader().__dict__