如何注释返回类型取决于其参数的函数?



在Python中,我经常编写过滤集合以查找特定子类型实例的函数。例如,我可能会在DOM中查找特定类型的节点或在日志中查找特定类型的事件:

def find_pre(soup: TagSoup) -> List[tags.pre]:
"""Find all <pre> nodes in `tag_soup`."""
…
def filter_errors(log: List[LogEvent]) -> List[LogError]:
"""Keep only errors from `log`.""" 
…

为这些函数编写类型很容易。但是,如果这些函数的泛型版本接受一个参数来指定要返回的类型,情况又会如何呢?

def find_tags(tag_soup: TagSoup, T: type) -> List[T]:
"""Find all nodes of type `T` in `tag_soup`."""
…
def filter_errors(log: List[LogEvent], T: type) -> List[T]:
"""Keep only events of type `T` from `log`.""" 
…

(上面的签名是错误的:我不能在返回类型中引用T)

这是一个相当常见的设计:docutilsnode.traverse(T: type),BeautifulSoupsoup.find_all(),等等。当然,它可以变得任意复杂,但是Python类型注释可以处理像上面这样的简单情况吗?

下面是一个非常具体的MWE:

from dataclasses import dataclass
from typing import *
@dataclass
class Packet: pass
@dataclass
class Done(Packet): pass
@dataclass
class Exn(Packet):
exn: str
loc: Tuple[int, int]
@dataclass
class Message(Packet):
ref: int
msg: str
Stream = Callable[[], Union[Packet, None]]
def stream_response(stream: Stream, types) -> Iterator[??]:
while response := stream():
if isinstance(response, Done): return
if isinstance(response, types): yield response
def print_messages(stream: Stream):
for m in stream_response(stream, Message):
print(m.msg) # Error: Cannot access member "msg" for "Packet"
msgs = iter((Message(0, "hello"), Exn("Oops", (1, 42)), Done()))
print_messages(lambda: next(msgs))

Pyright说:

29:17 - error: Cannot access member "msg" for type "Packet"
Member "msg" is unknown (reportGeneralTypeIssues)

在上面的例子中,有没有一种方法来注释stream_response,以便Python类型检查器将接受print_messages的定义?

好了,开始吧。它通过了MyPy——strict,但它并不漂亮。

这是怎么回事

对于给定的类A,我们知道A的实例类型将是A(显然)。但是A本身是什么类型的呢?从技术上讲,A的类型是type,因为所有不使用元类的python类都是type的实例。然而,用type注释参数并不能告诉类型检查器太多信息。用于python类型检查的语法"向上进一步"。在类型层次结构中,是Type[A]。因此,如果我们有一个函数myfunc,它返回一个作为参数输入的类的实例,我们可以相当简单地注释如下:

from typing import TypeVar, Type
T = TypeVar('T')
def myfunc(some_class: Type[T]) -> T:
# do some stuff
return some_class()
然而,你的情况要复杂得多。你可以输入一个类作为参数,也可以输入两个类,或者三个类……等。我们可以使用typing.overload来解决这个问题,它允许我们为给定的函数注册多个签名。这些签名在运行时完全被忽略;它们纯粹用于类型检查器;因此,这些函数的主体可以保留为空。一般来说,只在用@overload修饰的函数体中放置文档字符串或文字省略号...

我认为没有泛化这些重载函数的方法,这就是为什么可以传递给types参数的最大元素数量很重要的原因。您必须冗长地列举函数的每一个可能的签名。如果您沿着这条路走下去,您可能需要考虑将@overload签名移动到一个单独的.pyi存根文件中。

from dataclasses import dataclass
from typing import (
Callable,
Tuple,
Union,
Iterator,
overload,
TypeVar,
Type, 
Sequence
)
@dataclass
class Packet: pass
P1 = TypeVar('P1', bound=Packet)
P2 = TypeVar('P2', bound=Packet)
P3 = TypeVar('P3', bound=Packet)
P4 = TypeVar('P4', bound=Packet)
P5 = TypeVar('P5', bound=Packet)
P6 = TypeVar('P6', bound=Packet)
P7 = TypeVar('P7', bound=Packet)
P8 = TypeVar('P8', bound=Packet)
P9 = TypeVar('P9', bound=Packet)
P10 = TypeVar('P10', bound=Packet)
@dataclass
class Done(Packet): pass
@dataclass
class Exn(Packet):
exn: str
loc: Tuple[int, int]
@dataclass
class Message(Packet):
ref: int
msg: str
Stream = Callable[[], Union[Packet, None]]
@overload
def stream_response(stream: Stream, types: Type[P1]) -> Iterator[P1]:
"""Signature if exactly one type is passed in for the `types` parameter"""

@overload
def stream_response(
stream: Stream, 
types: Tuple[Type[P1], Type[P2]]
) -> Iterator[Union[P1, P2]]:
"""Signature if exactly two types are passed in for the `types` parameter"""

@overload
def stream_response(
stream: Stream, 
types: Tuple[Type[P1], Type[P2], Type[P3]]
) -> Iterator[Union[P1, P2, P3]]:
"""Signature if exactly three types are passed in for the `types` parameter"""

@overload
def stream_response(
stream: Stream, 
types: Tuple[Type[P1], Type[P2], Type[P3], Type[P4]]
) -> Iterator[Union[P1, P2, P3, P4]]:
"""Signature if exactly four types are passed in for the `types` parameter"""

@overload
def stream_response(
stream: Stream, 
types: Tuple[Type[P1], Type[P2], Type[P3], Type[P4], Type[P5]]
) -> Iterator[Union[P1, P2, P3, P4, P5]]:
"""Signature if exactly five types are passed in for the `types` parameter"""

@overload
def stream_response(
stream: Stream, 
types: Tuple[Type[P1], Type[P2], Type[P3], Type[P4], Type[P5], Type[P6]]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6]]:
"""Signature if exactly six types are passed in for the `types` parameter"""
@overload
def stream_response(
stream: Stream, 
types: Tuple[
Type[P1], 
Type[P2],
Type[P3],
Type[P4], 
Type[P5],
Type[P6],
Type[P7]
]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6, P7]]:
"""Signature if exactly seven types are passed in for the `types` parameter"""

@overload
def stream_response(
stream: Stream, 
types: Tuple[
Type[P1], 
Type[P2],
Type[P3],
Type[P4], 
Type[P5],
Type[P6],
Type[P7],
Type[P8]
]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6, P7, P8]]:
"""Signature if exactly eight types are passed in for the `types` parameter"""

@overload
def stream_response(
stream: Stream, 
types: Tuple[
Type[P1], 
Type[P2],
Type[P3],
Type[P4], 
Type[P5],
Type[P6],
Type[P7],
Type[P8],
Type[P9]
]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6, P7, P8, P9]]:
"""Signature if exactly nine types are passed in for the `types` parameter"""

@overload
def stream_response(
stream: Stream, 
types: Tuple[
Type[P1], 
Type[P2],
Type[P3],
Type[P4], 
Type[P5],
Type[P6],
Type[P7],
Type[P8],
Type[P9],
Type[P10]
]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6, P7, P8, P9, P10]]:
"""Signature if exactly ten types are passed in for the `types` parameter"""
# We have to be more generic in our type-hinting for the concrete implementation 
# Otherwise, MyPy struggles to figure out that it's a valid argument to `isinstance`
def stream_response(
stream: Stream,
types: Union[type, Tuple[type, ...]]
) -> Iterator[Packet]:

while response := stream():
if isinstance(response, Done): return
if isinstance(response, types): yield response
def print_messages(stream: Stream) -> None:
for m in stream_response(stream, Message):
print(m.msg)
msgs = iter((Message(0, "hello"), Exn("Oops", (1, 42)), Done()))
print_messages(lambda: next(msgs))

减少冗长的策略

如果您想使它更简洁,实现它的一种方法是为某些类型构造引入别名。这里的危险在于,类型提示的意图和含义很难阅读,但它确实使重载7-10看起来不那么可怕:

from dataclasses import dataclass
from typing import (
Callable,
Tuple,
Union,
Iterator,
overload,
TypeVar,
Type, 
Sequence
)
@dataclass
class Packet: pass
P1 = TypeVar('P1', bound=Packet)
P2 = TypeVar('P2', bound=Packet)
P3 = TypeVar('P3', bound=Packet)
P4 = TypeVar('P4', bound=Packet)
P5 = TypeVar('P5', bound=Packet)
P6 = TypeVar('P6', bound=Packet)
P7 = TypeVar('P7', bound=Packet)
P8 = TypeVar('P8', bound=Packet)
P9 = TypeVar('P9', bound=Packet)
P10 = TypeVar('P10', bound=Packet)
_P = TypeVar('_P', bound=Packet)
S = Type[_P]
T7 = Tuple[S[P1], S[P2], S[P3], S[P4], S[P5], S[P6], S[P7]]
T8 = Tuple[S[P1], S[P2], S[P3], S[P4], S[P5], S[P6], S[P7], S[P8]]
T9 = Tuple[S[P1], S[P2], S[P3], S[P4], S[P5], S[P6], S[P7], S[P8], S[P9]]
T10 = Tuple[S[P1], S[P2], S[P3], S[P4], S[P5], S[P6], S[P7], S[P8], S[P9], S[P10]]
@dataclass
class Done(Packet): pass
@dataclass
class Exn(Packet):
exn: str
loc: Tuple[int, int]
@dataclass
class Message(Packet):
ref: int
msg: str
Stream = Callable[[], Union[Packet, None]]
@overload
def stream_response(stream: Stream, types: Type[P1]) -> Iterator[P1]:
"""Signature if exactly one type is passed in for the `types` parameter"""
@overload
def stream_response(
stream: Stream, 
types: Tuple[Type[P1], Type[P2]]
) -> Iterator[Union[P1, P2]]:
"""Signature if exactly two types are passed in for the `types` parameter"""

@overload
def stream_response(
stream: Stream, 
types: Tuple[Type[P1], Type[P2], Type[P3]]
) -> Iterator[Union[P1, P2, P3]]:
"""Signature if exactly three types are passed in for the `types` parameter"""

@overload
def stream_response(
stream: Stream, 
types: Tuple[Type[P1], Type[P2], Type[P3], Type[P4]]
) -> Iterator[Union[P1, P2, P3, P4]]:
"""Signature if exactly four types are passed in for the `types` parameter"""

@overload
def stream_response(
stream: Stream, 
types: Tuple[Type[P1], Type[P2], Type[P3], Type[P4], Type[P5]]
) -> Iterator[Union[P1, P2, P3, P4, P5]]:
"""Signature if exactly five types are passed in for the `types` parameter"""

@overload
def stream_response(
stream: Stream, 
types: Tuple[Type[P1], Type[P2], Type[P3], Type[P4], Type[P5], Type[P6]]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6]]:
"""Signature if exactly six types are passed in for the `types` parameter"""
@overload
def stream_response(
stream: Stream, 
types: T7[P1, P2, P3, P4, P5, P6, P7]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6, P7]]:
"""Signature if exactly seven types are passed in for the `types` parameter"""

@overload
def stream_response(
stream: Stream, 
types: T8[P1, P2, P3, P4, P5, P6, P7, P8]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6, P7, P8]]:
"""Signature if exactly eight types are passed in for the `types` parameter"""

@overload
def stream_response(
stream: Stream, 
types: T9[P1, P2, P3, P4, P5, P6, P7, P8, P9]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6, P7, P8, P9]]:
"""Signature if exactly nine types are passed in for the `types` parameter"""

@overload
def stream_response(
stream: Stream, 
types: T10[P1, P2, P3, P4, P5, P6, P7, P8, P9, P10]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6, P7, P8, P9, P10]]:
"""Signature if exactly ten types are passed in for the `types` parameter"""
# We have to be more generic in our type-hinting for the concrete implementation 
# Otherwise, MyPy struggles to figure out that it's a valid argument to `isinstance`
def stream_response(
stream: Stream,
types: Union[type, Tuple[type, ...]]
) -> Iterator[Packet]:

while response := stream():
if isinstance(response, Done): return
if isinstance(response, types): yield response
def print_messages(stream: Stream) -> None:
for m in stream_response(stream, Message):
print(m.msg)
msgs = iter((Message(0, "hello"), Exn("Oops", (1, 42)), Done()))
print_messages(lambda: next(msgs))

最新更新