如何将Python数据类转换为字符串字面量的字典?



给定如下数据类:

class MessageHeader(BaseModel):
message_id: uuid.UUID
def dict(self, **kwargs):
return json.loads(self.json())

当我在MessageHeader上调用dict时,我想获得字符串字面量的字典dictionary的期望结果如下所示:

{'message_id': '383b0bfc-743e-4738-8361-27e6a0753b5a'}

我想避免使用第三方库,如pydantic&我不想使用json.loads(self.json()),因为有额外的往返

是否有更好的方法将数据类转换为上面的字符串字面值的字典?

您可以使用dataclasses.asdict:

from dataclasses import dataclass, asdict
class MessageHeader(BaseModel):
message_id: uuid.UUID
def dict(self):
return {k: str(v) for k, v in asdict(self).items()}

如果您确定您的类只有字符串值,则可以完全跳过字典推导:

class MessageHeader(BaseModel):
message_id: uuid.UUID
dict = asdict

对于绝对纯速度无限效率,甚至可以引起像查克·诺里斯这样的人停下来,无奈地敬畏地看着,我谦卑地推荐这个非常精心策划的方法与__dict__:

def dict(self):
_dict = self.__dict__.copy()
_dict['message_id'] = str(_dict['message_id'])
return _dict

对于定义了__slots__属性的类例如,对于@dataclass(slots=True),上述方法很可能不起作用,因为__dict__属性在类实例上不可用。在这种情况下,一个高效的"登月计划"下面的方法可能是可行的:

def dict(self):
body_lines = ','.join(f"'{f}':" + (f'str(self.{f})' if f == 'message_id'
else f'self.{f}') for f in self.__slots__)
# Compute the text of the entire function.
txt = f'def dict(self):n return {{{body_lines}}}'
ns = {}
exec(txt, locals(), ns)
_dict_fn = self.__class__.dict = ns['dict']
return _dict_fn(self)

如果现在有人在他们的座位边缘摇摇欲倒(我知道,这真的是不可思议的,突破级别的东西)-我已经通过下面的timeit模块添加了我的个人计时,这应该有希望在性能方面的东西更多的光。

仅供参考,使用纯__dict__的方法不可避免地dataclasses.asdict()快得多。

注意:尽管__dict__在这种特殊情况下工作得更好,但dataclasses.asdict()可能更适合组合字典,例如具有嵌套数据类的字典,或者具有可变类型的值,如dictlist

from dataclasses import dataclass, asdict, field
from uuid import UUID, uuid4

class DictMixin:
"""Mixin class to add a `dict()` method on classes that define a __slots__ attribute"""
def dict(self):
body_lines = ','.join(f"'{f}':" + (f'str(self.{f})' if f == 'message_id'
else f'self.{f}') for f in self.__slots__)
# Compute the text of the entire function.
txt = f'def dict(self):n return {{{body_lines}}}'
ns = {}
exec(txt, locals(), ns)
_dict_fn = self.__class__.dict = ns['dict']
return _dict_fn(self)

@dataclass
class MessageHeader:
message_id: UUID = field(default_factory=uuid4)
string: str = 'a string'
integer: int = 1000
floating: float = 1.0
def dict1(self):
_dict = self.__dict__.copy()
_dict['message_id'] = str(_dict['message_id'])
return _dict
def dict2(self):
return {k: str(v) if k == 'message_id' else v
for k, v in self.__dict__.items()}
def dict3(self):
return {k: str(v) if k == 'message_id' else v
for k, v in asdict(self).items()}

@dataclass(slots=True)
class MessageHeaderWithSlots(DictMixin):
message_id: UUID = field(default_factory=uuid4)
string: str = 'a string'
integer: int = 1000
floating: float = 1.0
def dict2(self):
return {k: str(v) if k == 'message_id' else v
for k, v in asdict(self).items()}

if __name__ == '__main__':
from timeit import timeit
header = MessageHeader()
header_with_slots = MessageHeaderWithSlots()
n = 10000
print('dict1():  ', timeit('header.dict1()', number=n, globals=globals()))
print('dict2():  ', timeit('header.dict2()', number=n, globals=globals()))
print('dict3():  ', timeit('header.dict3()', number=n, globals=globals()))
print('slots -> dict():  ', timeit('header_with_slots.dict()', number=n, globals=globals()))
print('slots -> dict2(): ', timeit('header_with_slots.dict2()', number=n, globals=globals()))
print()
dict__ = header.dict1()
print(dict__)
asdict__ = header.dict3()
print(asdict__)
assert isinstance(dict__['message_id'], str)
assert isinstance(dict__['integer'], int)
assert header.dict1() == header.dict2() == header.dict3()
assert header_with_slots.dict() == header_with_slots.dict2()

我的Mac M1笔记本电脑的测试结果:

dict1():   0.005992999998852611
dict2():   0.00800508284009993
dict3():   0.07069579092785716
slots -> dict():   0.00583599996753037
slots -> dict2():  0.07395245810039341
{'message_id': 'b4e17ef9-1a58-4007-9cef-39158b094da2', 'string': 'a string', 'integer': 1000, 'floating': 1.0}
{'message_id': 'b4e17ef9-1a58-4007-9cef-39158b094da2', 'string': 'a string', 'integer': 1000, 'floating': 1.0}

注意:对于更"完整"的"DictMixin(命名为SerializableMixin)的实现,查看我也添加的相关答案。

灵感来自@rv。根据kvetch的回答,我编写了这个装饰器,它将根据类定义动态地为asdict方法生成代码。它还支持子类化,这意味着子类将继承超类的属性。

修饰符:

import typing

def generate_dict_method(
__source: typing.Literal["slots", "annotations"],
__name: str,
/,
**custom_mappings: typing.Callable[[typing.Any], typing.Any]
):
if custom_mappings is None:
custom_mappings = dict()
def decorator(cls):
attributes = set()
for mc in cls.__mro__:
if __source == 'annotations':
attrs = getattr(mc, "__annotations__", None)
if attrs:
attrs = attrs.keys()
elif __source == "slots":
attrs = getattr(mc, "__slots__", None)
else:
raise NotImplementedError(__source)
if attrs:
attributes.update(attrs)
if not attributes:
raise RuntimeError(
f"Unable to generate `{__name}` method for `{cls.__qualname__}` class: "
"no attributes found."
)
funclocals = {}
mapping_to_funcname = {}
for attrname, f in custom_mappings.items():
funcname = f'__parse_{attrname}'
funclocals[funcname] = f
mapping_to_funcname[attrname] = funcname
body_lines = ','.join([
f'"{attrname}": ' + (f'self.{attrname}' if attrname not in custom_mappings
else f'{mapping_to_funcname[attrname]}(self.{attrname})')
for attrname in attributes
])
txt = f'def {__name}(self):n return {{{body_lines}}}'
d = dict()
exec(txt, funclocals, d)
setattr(cls, __name, d[__name])
return cls
return decorator

用法:


from dataclasses import dataclass
import json

@dataclass(slots=True, kw_only=True)
class TestBase:
i1: int
i2: int

@generate_dict_method("annotations", "asdict", d=(lambda x: "FUNNY" + json.dumps(x) + "JSON"))
@dataclass(slots=True, kw_only=True)
class Test(TestBase):
i: int
b: bool
s: str
d: dict

a = Test(i=1, b=True, s="test", d={"test": "test"}, i1=2, i2=3)
print(a.asdict())
输出:

{'d': 'FUNNY{"test": "test"}JSON', 'i': 1, 'i1': 2, 'b': True, 's': 'test', 'i2': 3}

可以看到,您只需要为带有属性名称的**custom_mappings参数提供一个自定义解析器。这样你就可以以任何你认为合适的方式改变这个属性。

在您的例子中,您可以为message_id属性提供str函数。

这是"dataclass to dict"的google搜索结果,上面的答案太复杂了。您可能正在寻找这个:

from dataclasses import dataclass
@dataclass
class MessageHeader():
uuid: str = "abcd"
vars(MessageHeader()) # or MessageHeader().__dict__

相关内容

  • 没有找到相关文章