问题
- 当状态由数据类的元类生成时,如何以保存其状态的方式序列化数据类的实例?
- 请参阅说明和示例代码以获得更多说明
- 如何在python中以复制
working-case
的方式共享上下文,同时实现sample-issue
情况的模块化?- 上下文共享引用
- 有没有一种不使用元类的抽象方式来填充数据类的替代方法?
- 需要进行抽象,因为我有多个代表请求信息的数据类,它们都以类似的方式填充
描述
我目前正在编写一个flask web应用程序,该应用程序通过redis队列(使用rq
(将任务处理传递给rq工作者,并且我在序列化包含请求信息的数据类时遇到了问题。当rq
试图序列化一个使用元类填充其所有字段的数据类时,我遇到的问题就会发生。
在调试和诊断该问题时,我注意到当在同一文件/模块中创建和定义实例时,dill
和cloudpickle
能够正确地序列化对象的实例,但一旦我将对象的定义移动到不同的文件/模块,它们就无法以维护实例状态的方式序列化对象。
我在下面添加了一个简化的示例来复制我的问题。
环境
Python Version: python 3.7.3
OS: Windows
# File structure
sample-issue
--> dataclass.py
--> serialization.py
--> deserialization.py
--> __init__.py
working-case
--> sample.py
--> __init__.py
样品问题
# dataclass.py
from dataclasses import dataclass, field
from typing import List, Dict, Any
import json
from collections import defaultdict
class JSONRequest(type):
def __call__(cls, *args, **kwargs):
"""
This is a metaclass used to autonomously populate dataclasses
NOTE: This metaclass only works with dataclasses
Optional Parameter:
inp: class attribute that is a callable that produces a dictionary
"""
inp = cls.__dict__["inp"]()
cls.__initialize(inp)
return cls
def __initialize(cls, inp: dict) -> None:
"""
Initializes all of the dataclasses fields
If the field is missing in the JSON request and it does not have a default value in the data class a
ValueError error will be raised. Additionally if the Json value is [], {}, "" will default to the default
value, and if the default value is missing an InvalidRequest error will also be raised.
Parameters:
inp: Json input
"""
_json = defaultdict(lambda: None)
_json.update(inp)
for name, _ in cls.__dataclass_fields__.items():
if (not _json[name]) and (name not in cls.__dict__.keys()):
raise ValueError(f"Request is missing the {name} field")
value = _json[name] or cls.__dict__[name]
setattr(cls, name, value)
def __str__(cls):
rep = {name: getattr(cls, name) for name, _ in cls.__dataclass_fields__.items()}
return json.dumps(rep, indent=4)
def generate_input():
"""
Stub method for generating input
"""
return {
"email_list": [f"{name}@yahoo.com" for name in ["peter", "mark", "alysa"]],
"message": "Foo bar fizzbuzz",
"subject": "Sample Issue",
"info": {
"time": 1619628747.9166002,
"count": 3,
}
}
@dataclass
class EmailChain(metaclass=JSONRequest):
email_list: List[str] = field(init=False)
message: str = field(init=False)
subject: str = field(init=False)
info: Dict[str, Any] = field(init=False)
inp = generate_input
# serialization.py
import dill
from sample_issue.dataclass import EmailChain
obj = EmailChain()
data_stream = dill.dumps(obj)
print(data_stream)
# output: b'x80x03csrc.TestnEmailChainnqx00.'
# deserialization
import dill
from sample_issue.dataclass import EmailChain
input = b'x80x03csrc.TestnEmailChainnqx00.'
obj = dill.loads(input)
print(obj)
# Results in error since obj is missing data class fields for __str__ method
工作案例
# Working-case
import dill
from dataclasses import dataclass, field
from typing import List, Dict, Any
import json
from collections import defaultdict
class JSONRequest(type):
def __call__(cls, *args, **kwargs):
"""
This is a metaclass used to autonomously populate dataclasses
NOTE: This metaclass only works with dataclasses
Optional Parameter:
inp: class attribute that is a callable that produces a dictionary
"""
inp = cls.__dict__["inp"]()
cls.__initialize(inp)
return cls
def __initialize(cls, inp: dict) -> None:
"""
Initializes all of the dataclasses fields
If the field is missing in the JSON request and it does not have a default value in the data class a
ValueError error will be raised. Additionally if the Json value is [], {}, "" will default to the default
value, and if the default value is missing an InvalidRequest error will also be raised.
Parameters:
inp: Json input
"""
_json = defaultdict(lambda: None)
_json.update(inp)
for name, _ in cls.__dataclass_fields__.items():
if (not _json[name]) and (name not in cls.__dict__.keys()):
raise ValueError(f"Request is missing the {name} field")
value = _json[name] or cls.__dict__[name]
setattr(cls, name, value)
def __str__(cls):
rep = {name: getattr(cls, name) for name, _ in cls.__dataclass_fields__.items()}
return json.dumps(rep, indent=4)
def generate_input():
"""
Stub method for generating input
"""
return {
"email_list": [f"{name}@yahoo.com" for name in ["peter", "mark", "alysa"]],
"message": "Foo bar fizzbuzz",
"subject": "Sample Issue",
"info": {
"time": 1619628747.9166002,
"count": 3,
}
}
@dataclass
class EmailChain(metaclass=JSONRequest):
email_list: List[str] = field(init=False)
message: str = field(init=False)
subject: str = field(init=False)
info: Dict[str, Any] = field(init=False)
inp = generate_input
obj = EmailChain()
data_stream = dill.dumps(obj)
print(data_stream)
# output: b'x80x03cdill._dilln_create_typenqx00(hx00(cdill._dilln_load_typenqx01Xx04x00x00x00typeqx02x85qx03Rqx04Xx0bx00x00x00JSONRequestqx05hx04x85qx06}qx07(Xnx00x00x00__module__qx08Xx08x00x00x00__main__qtXx08x00x00x00__call__qncdill._dilln_create_functionnqx0b(cdill._dilln_create_codenqx0c(Kx01Kx00Kx04Kx03KOCx1a|x00jx00dx01x19x00x83x00}x03|x00xa0x01|x03xa1x01x01x00|x00Sx00qrXxf5x00x00x00n This is a metaclass used to autonomously populate dataclassesnn NOTE: This metaclass only works with dataclassesnn Optional Parameter:n inp: class attribute that is a callable that produces a dictionaryn qx0eXx03x00x00x00inpqx0fx86qx10Xx08x00x00x00__dict__qx11Xx18x00x00x00_JSONRequest__initializeqx12x86qx13(Xx03x00x00x00clsqx14Xx04x00x00x00argsqx15Xx06x00x00x00kwargsqx16hx0ftqx17XLx00x00x00C:/Users/739908/Work/systems_automation/report_automation_engine/src/temp.pyqx18hnKnCx06x00tx0cx01nx01qx19))tqx1aRqx1bc__builtin__n__main__nhnNN}qx1cNtqx1dRqx1ehx12hx0b(hx0c(Kx02Kx00Kx06Kx05KCCvtx00dx01dx02x84x00x83x01}x02|x02xa0x01|x01xa1x01x01x00xZ|x00jx02xa0x03xa1x00Dx00]L\x02}x03}x04|x02|x03x19x00sP|x03|x00jx04xa0x05xa1x00kx07rPtx06dx03|x03x9bx00dx04x9dx03x83x01x82x01|x02|x03x19x00p`|x00jx04|x03x19x00}x05tx07|x00|x03|x05x83x03x01x00q"Wx00dx05Sx00qx1f(Xxacx01x00x00n Initializes all of the dataclasses fieldsnn If the field is missing in the JSON request and it does not have a default value in the data class an ValueError error will be raised. Additionally if the Json value is [], {}, "" will default to the defaultn value, and if the default value is missing an InvalidRequest error will also be raised.nn Parameters:n inp: Json inputn q hx0c(Kx00Kx00Kx00Kx01KSCx04dx00Sx00q!Nx85q"))hx18Xx08x00x00x00<lambda>q#K"Cx00q$))tq%Rq&X*x00x00x00JSONRequest.__initialize.<locals>.<lambda>q'Xx17x00x00x00Request is missing the q(Xx06x00x00x00 fieldq)Ntq*(Xx0bx00x00x00defaultdictq+Xx06x00x00x00updateq,Xx14x00x00x00__dataclass_fields__q-Xx05x00x00x00itemsq.hx11Xx04x00x00x00keysq/Xnx00x00x00ValueErrorq0Xx07x00x00x00setattrq1tq2(hx14hx0fXx05x00x00x00_jsonq3Xx04x00x00x00nameq4Xx01x00x00x00_q5Xx05x00x00x00valueq6tq7hx18Xx0cx00x00x00__initializeq8Kx17Cx0ex00x0bx0cx01nx02x14x01x16x01x10x02x12x01q9))tq:Rq;c__builtin__n__main__nh8NN}q<Ntq=Rq>Xx07x00x00x00__str__q?hx0b(hx0c(Kx01Kx00Kx02Kx04Kx03C&x87x00fx01dx01dx02x84x08x88x00jx00xa0x01xa1x00Dx00x83x01}x01tx02jx03|x01dx03dx04x8dx02Sx00q@(Nhx0c(Kx01Kx00Kx03Kx05Kx13Cx1cix00|x00]x14\x02}x01}x02tx00x88x00|x01x83x02|x01x93x02qx04Sx00qA)Xx07x00x00x00getattrqBx85qCXx02x00x00x00.0qDh4h5x87qEhx18Xnx00x00x00<dictcomp>qFK-Cx02x06x00qGhx14x85qH)tqIRqJX'x00x00x00JSONRequest.__str__.<locals>.<dictcomp>qKKx04Xx06x00x00x00indentqLx85qMtqN(h-h.Xx04x00x00x00jsonqOXx05x00x00x00dumpsqPtqQhx14Xx03x00x00x00repqRx86qShx18h?K,Cx04x00x01x18x01qT)hx14x85qUtqVRqWc__builtin__n__main__nh?NN}qXNtqYRqZXx07x00x00x00__doc__q[Nutq\Rq]Xnx00x00x00EmailChainq^hx01Xx06x00x00x00objectq_x85q`Rqax85qb}qc(hx08htXx0fx00x00x00__annotations__qd}qe(Xnx00x00x00email_listqfcdill._dilln_get_attrnqgcdill._dilln_import_modulenqhXtx00x00x00_operatorqix85qjRqkXx07x00x00x00getitemqlx86qmRqnctypingnListnqohx01Xx03x00x00x00strqpx85qqRqrx86qsRqtXx07x00x00x00messagequhrXx07x00x00x00subjectqvhrXx04x00x00x00infoqwhnctypingnDictnqxhrctypingnAnynqyx86qzx86q{Rq|uhx0fhx0b(hx0c(Kx00Kx00Kx00Kx06KCCx1edx01dx02x84x00dx03Dx00x83x01dx04dx05dx06dx07dx08x9cx02dtx9cx04Sx00q}(X*x00x00x00n Stub method for generating inputn q~hx0c(Kx01Kx00Kx02Kx04KSCx16gx00|x00]x0e}x01|x01x9bx00dx00x9dx02x91x02qx04Sx00qx7fXnx00x00x00@yahoo.comqx80x85qx81)hDh4x86qx82hx18Xnx00x00x00<listcomp>qx83K6Cx02x06x00qx84))tqx85Rqx86X"x00x00x00generate_input.<locals>.<listcomp>qx87Xx05x00x00x00peterqx88Xx04x00x00x00markqx89Xx05x00x00x00alysaqx8ax87qx8bXx10x00x00x00Foo bar fizzbuzzqx8cXx0cx00x00x00Sample Issueqx8dGAxd8"dxb2xfaxa9x94Kx03Xx04x00x00x00timeqx8eXx05x00x00x00countqx8fx86qx90(hfhuhvhwtqx91tqx92))hx18Xx0ex00x00x00generate_inputqx93K1Cnx00x05x0cx01x02x01x02x02x02x01qx94))tqx95Rqx96c__builtin__n__main__nhx93NN}qx97Ntqx98Rqx99h[Xx1bx00x00x00EmailChain(*args, **kwargs)qx9aXx14x00x00x00__dataclass_params__qx9bcdataclassesn_DataclassParamsnqx9c)x81qx9dN}qx9e(Xx04x00x00x00initqx9fx88Xx04x00x00x00reprqxa0x88Xx02x00x00x00eqqxa1x88Xx05x00x00x00orderqxa2x89Xx0bx00x00x00unsafe_hashqxa3x89Xx06x00x00x00frozenqxa4x89ux86qxa5bh-}qxa6(hfcdataclassesnFieldnqxa7)x81qxa8N}qxa9(h4hfhx02htXx07x00x00x00defaultqxaacdataclassesn_MISSING_TYPEnqxab)x81qxacXx0fx00x00x00default_factoryqxadhxachxa0x88Xx04x00x00x00hashqxaeNhx9fx89Xx07x00x00x00compareqxafx88Xx08x00x00x00metadataqxb0hx01Xx10x00x00x00MappingProxyTypeqxb1x85qxb2Rqxb3}qxb4x85qxb5Rqxb6Xx0bx00x00x00_field_typeqxb7cdataclassesn_FIELD_BASEnqxb8)x81qxb9}qxbah4Xx06x00x00x00_FIELDqxbbsbux86qxbcbhuhxa7)x81qxbdN}qxbe(h4huhx02hrhxaahxachxadhxachxa0x88hxaeNhx9fx89hxafx88hxb0hxb6hxb7hxb9ux86qxbfbhvhxa7)x81qxc0N}qxc1(h4hvhx02hrhxaahxachxadhxachxa0x88hxaeNhx9fx89hxafx88hxb0hxb6hxb7hxb9ux86qxc2bhwhxa7)x81qxc3N}qxc4(h4hwhx02h|hxaahxachxadhxachxa0x88hxaeNhx9fx89hxafx88hxb0hxb6hxb7hxb9ux86qxc5buXx08x00x00x00__init__qxc6hx0b(hx0c(Kx01Kx00Kx01Kx01KCCx04dx00Sx00qxc7Nx85qxc8)Xx04x00x00x00selfqxc9x85qxcaXx08x00x00x00<string>qxcbhxc6Kx01Cx02x00x01qxcc))tqxcdRqxce}qxcf(Xx07x00x00x00MISSINGqxd0hxacXx14x00x00x00_HAS_DEFAULT_FACTORYqxd1cdataclassesn_HAS_DEFAULT_FACTORY_CLASSnqxd2)x81qxd3Xx0cx00x00x00__builtins__qxd4hhXx08x00x00x00builtinsqxd5x85qxd6Rqxd7uhxc6NN}qxd8Ntqxd9RqxdaXx08x00x00x00__repr__qxdbhx0b(hx0c(Kx01Kx00Kx03KtKx13CDtx00|x00x83x01tx01xa0x02xa1x00fx02}x01|x01x88x00kx06rx1cdx01Sx00x88x00xa0x03|x01xa1x01x01x00zx0cx88x01|x00x83x01}x02Wx00dx00x88x00xa0x04|x01xa1x01x01x00Xx00|x02Sx00qxdcNXx03x00x00x00...qxddx86qxde(Xx02x00x00x00idqxdfXx07x00x00x00_threadqxe0Xtx00x00x00get_identqxe1Xx03x00x00x00addqxe2Xx07x00x00x00discardqxe3tqxe4hxc9Xx03x00x00x00keyqxe5Xx06x00x00x00resultqxe6x87qxe7X'x00x00x00C:\DevApps\Python3.7\lib\dataclasses.pyqxe8Xx07x00x00x00wrapperqxe9M^x01Cx10x00x02x10x01x08x01x04x01nx01x02x01x0cx02x0cx01qxeaXx0cx00x00x00repr_runningqxebXrx00x00x00user_functionqxecx86qxed)tqxeeRqxefcdataclassesn__dict__nhxdbNcdill._dilln_create_cellnqxf0hx01Xx03x00x00x00setqxf1x85qxf2Rqxf3]qxf4x85qxf5Rqxf6x85qxf7Rqxf8hxf0hx0b(hx0c(Kx01Kx00Kx01KnKCC.|x00jx00jx01dx01|x00jx02x9bx02dx02|x00jx03x9bx02dx03|x00jx04x9bx02dx04|x00jx05x9bx02dx05x9dtx17x00Sx00qxf9(NXx0cx00x00x00(email_list=qxfaXnx00x00x00, message=qxfbXnx00x00x00, subject=qxfcXx07x00x00x00, info=qxfdXx01x00x00x00)qxfetqxff(Xtx00x00x00__class__rx00x01x00x00Xx0cx00x00x00__qualname__rx01x01x00x00hfhuhvhwtrx02x01x00x00hxc9x85rx03x01x00x00hxcbhxdbKx01Cx02x00x01rx04x01x00x00))trx05x01x00x00Rrx06x01x00x00cdataclassesn__dict__nhxdbNN}rx07x01x00x00Ntrx08x01x00x00Rrtx01x00x00x85rnx01x00x00Rrx0bx01x00x00x86rx0cx01x00x00}rrx01x00x00Xx0bx00x00x00__wrapped__rx0ex01x00x00jtx01x00x00sNtrx0fx01x00x00Rrx10x01x00x00Xx06x00x00x00__eq__rx11x01x00x00hx0b(hx0c(Kx02Kx00Kx02Kx05KCC8|x01jx00|x00jx00kx08r4|x00jx01|x00jx02|x00jx03|x00jx04fx04|x01jx01|x01jx02|x01jx03|x01jx04fx04kx02Sx00tx05Sx00rx12x01x00x00Nx85rx13x01x00x00(jx00x01x00x00hfhuhvhwXx0ex00x00x00NotImplementedrx14x01x00x00trx15x01x00x00hxc9Xx05x00x00x00otherrx16x01x00x00x86rx17x01x00x00hxcbjx11x01x00x00Kx01Cx06x00x01x0cx01(x01rx18x01x00x00))trx19x01x00x00Rrx1ax01x00x00cdataclassesn__dict__njx11x01x00x00NN}rx1bx01x00x00Ntrx1cx01x00x00Rrx1dx01x00x00Xx08x00x00x00__hash__rx1ex01x00x00Nhf]rx1fx01x00x00(Xx0fx00x00x00peter@yahoo.comr x01x00x00Xx0ex00x00x00mark@yahoo.comr!x01x00x00Xx0fx00x00x00alysa@yahoo.comr"x01x00x00ehuhx8chvhx8dhw}r#x01x00x00(hx8eGAxd8"dxb2xfaxa9x94hx8fKx03uutr$x01x00x00Rr%x01x00x00.'
c = dill.loads(data_stream)
print(c)
"""
output:
{
"email_list": [
"peter@yahoo.com",
"mark@yahoo.com",
"alysa@yahoo.com"
],
"message": "Foo bar fizzbuzz",
"subject": "Sample Issue",
"info": {
"time": 1619628747.9166002,
"count": 3
}
}
"""
我已经解决了dill
需要元类类型寄存器进行编组的问题。下面是dill的更新元类和类型注册,解决了这个问题。
更新的元类
from typing import Dict, Tuple
from core.util.errors import OptionalModuleError, InvalidRequest
import json
from collections import defaultdict
try:
from flask import request
except ImportError:
raise OptionalModuleError("metaclasses.JSONRequest", ["flask"])
class JSONRequest(type):
"""
This is a metaclass used to autonomously populate dataclasses with the request data coming in to flask
NOTE: This metaclass only works with dataclasses
Optional Parameter:
inp: **kwargs dict or a class attribute that is a callable that produces a dictionary
"""
def __call__(cls, *args, **kwargs):
if "inp" in kwargs:
inp = kwargs["inp"]
elif "inp" in cls.__dict__:
inp = cls.__dict__["inp"]()
else:
inp = request.json
cls.__initialize(inp)
return cls
def __initialize(cls, inp: Dict) -> None:
"""
Initializes all of the dataclasses fields
If the field is missing in the JSON request and it does not have a default value in the data class a
InvalidRequest error will be raised. Additionally if the Json value is [], {}, "" will default to the default
value, and if the default value is missing an InvalidRequest error will also be raised.
Parameters:
inp: Json input
"""
json = defaultdict(lambda: None)
json.update(inp)
for name, _ in cls.__dataclass_fields__.items():
if (not json[name]) and (name not in cls.__dict__.keys()):
raise InvalidRequest(f"Request is missing the {name} field")
value = json[name] or cls.__dict__[name]
setattr(cls, name, value)
def _get_fields(cls) -> Dict:
"""
This method returns all of the dataclasses fields
Returns:
Dict/JSON representation of the dataclasses attributes
"""
return {name: getattr(cls, name) for name, _ in cls.__dataclass_fields__.items()}
def get_info(cls) -> Tuple[str, str, Dict]:
"""
This method returns all the needed information to reconstruct/deserialize the class
Returns:
Tuple containing the dataclass' module location and name for importing it when deserializing and the
input used to repopulate the class to it last state
"""
return cls.__module__, cls.__name__, cls._get_fields()
def __str__(cls) -> str:
"""
Method to print the request nicely
Returns:
The str representation of the class
"""
return json.dumps(cls._get_fields(), indent=4)
Dill类型注册
def recreate_request(mod_name: str, class_name: str, inp: Dict) -> JSONRequest:
"""
Method to rebuild the serialized request
Parameters:
mod_name: The name of the dataclass' module
class_name: The name of the dataclass
inp: The state of the dataclass' member variables
Returns:
The deserialized JSON Request with its last known state
"""
mod = __import__(mod_name, fromlist=[class_name])
klass = getattr(mod, class_name)
return klass(inp=inp)
@dill.register(JSONRequest)
def reduce_request(pickler, obj):
"""
This method registers recreate_request for all dataclasses with a metaclass of JSONRequest
Parameter:
pickler: The pickler from the dill library
obj: The dataclass being serialized
"""
args = obj.get_info()
pickler.save_reduce(recreate_request, args, obj=obj)