我正在尝试将这里给出的解决方案从 PyQt4 转换为 PyQt5 作为练习。
不知何故,收集的html代码在途中丢失了。我对了解正在发生的事情的方法进行了一些print()
。Callable
方法的print()
显示 HTML 代码。但是,在handleLoadFinished
方法中,它是None
的,因此,funA
和funcB
的功能不必工作。
我正在工作的代码是:
import sys, signal
from bs4 import BeautifulSoup
from bs4.dammit import UnicodeDammit
from PyQt5 import QtCore, QtGui
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEnginePage as QWebPage
class WebPage(QWebPage):
def __init__(self):
QWebPage.__init__(self)
self.loadFinished.connect(self.handleLoadFinished)
def process(self, items):
self._items = iter(items)
self.fetchNext()
def fetchNext(self):
try:
self._url, self._func = next(self._items)
self.load(QtCore.QUrl(self._url))
except StopIteration:
return False
return True
def handleLoadFinished(self):
A = self.toHtml(self.Callable)
print('nnnnn')
print("####################### handleLoadFinished: ", A)
self._func(self._url, self.toHtml(self.Callable))
if not self.fetchNext():
print('# processing complete')
#self._exit()
def Callable(self, html_str):
self.html = html_str
print('#################### Callable html:', self.html)
def _exit(self):
print("exiting...")
QApplication.instance().quit()
def funcA(url, html):
print('# processing:', url)
print('html:', html)
soup = BeautifulSoup(html, "html.parser")
# do stuff with soup...
def funcB(url, html):
print('# processing:', url)
print('html:', html)
soup = BeautifulSoup(UnicodeDammit(html).unicode_markup)
# do stuff with soup...
items = [
('http://stackoverflow.com', funcA),
('http://google.com', funcB),
]
signal.signal(signal.SIGINT, signal.SIG_DFL)
print('Press Ctrl+C to quitn')
app = QApplication(sys.argv)
webpage = WebPage()
webpage.process(items)
sys.exit(app.exec_())
任何帮助我理解和纠正的建议将不胜感激!
在QtWebEngine中获取html是异步的,所以你得到None,而是你必须通过functools.partial()
传递"self._func"来添加url:
from functools import partial
import signal
import sys
from bs4 import BeautifulSoup
from bs4.dammit import UnicodeDammit
from PyQt5.QtCore import QUrl
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEnginePage as QWebPage
class WebPage(QWebPage):
def __init__(self):
QWebPage.__init__(self)
self.loadFinished.connect(self.handleLoadFinished)
def process(self, items):
self._items = iter(items)
self.fetchNext()
def fetchNext(self):
try:
self._url, self._func = next(self._items)
self.load(QUrl(self._url))
except StopIteration:
return False
return True
def handleLoadFinished(self):
wrapper = partial(self._func, self._url)
self.toHtml(wrapper)
if not self.fetchNext():
print("# processing complete")
def funcA(url, html):
print("# processing:", url)
print("html:", html)
soup = BeautifulSoup(html, "html.parser")
def funcB(url, html):
print("# processing:", url)
print("html:", html)
soup = BeautifulSoup(UnicodeDammit(html).unicode_markup)
items = [
("http://stackoverflow.com", funcA),
("http://google.com", funcB),
]
def main():
signal.signal(signal.SIGINT, signal.SIG_DFL)
print("Press Ctrl+C to quitn")
app = QApplication(sys.argv)
webpage = WebPage()
webpage.process(items)
sys.exit(app.exec_())
if __name__ == "__main__":
main()