从函数调用PyQt5呈现时出现问题



我有一个PyQt5 scraper,它必须在抓取网页之前渲染网页,因为网页有动态数据。这是剧本中最简单的版本,不幸的是,它仍然有几个部分。

需要从函数调用渲染的唯一原因是,有时它会无限冻结,因此会有多线程超时。这一切都很好,只是渲染在函数内部无法正常工作,因为QApplication由于某种原因没有正确传入。我可以定义App=QApplication(sys.argv(,并将Render类放在ScrapeClockwise函数中,但这也需要在该函数中定义App(由于某种原因,它无法传入。(然后,如果函数超时,它将在不关闭QApplication的情况下将其踢出,所以下次函数运行时,程序只会崩溃。如果它是在一个TRY-EXCEPT语句中定义的,这甚至会发生,这是非常奇怪的。

正如你所看到的,这里有很多奇怪的互动,如果有人能揭示其中的任何一个,我将不胜感激,我已经为此头疼了一段时间了。

import sys
from PyQt5.QtCore import *
from PyQt5.QtWebKitWidgets import *
from PyQt5.QtWidgets import *
from bs4 import BeautifulSoup
import threading
import functools
from threading import Thread
def timeout(timeout):
def deco(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
res = [Exception('function [%s] timeout [%s seconds] exceeded!' % (func.__name__, timeout))]
def newFunc():
try:
res[0] = func(*args, **kwargs)
except Exception as e:
res[0] = e
t = Thread(target=newFunc)
t.daemon = True
try:
t.start()
t.join(timeout)
except Exception as je:
print('error starting thread')
raise je
ret = res[0]
if isinstance(ret, BaseException):
raise ret
return ret
return wrapper
return deco
APP = QApplication(sys.argv)
class SomeClass(QWidget):
def some_method(self):
APP.processEvents(QEventLoop.ExcludeUserInputEvents | QEventLoop.ExcludeSocketNotifiers | 
QEventLoop.WaitForMoreEvents)
class Render(QWebPage):
def __init__(self, url):
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
APP.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
APP.quit()
def ScrapeClockwise(l):
url = "https://www.clockwisemd.com/hospitals/" + str(l).zfill(4) + "/appointments/new"
print(url)
r = Render(url)
result = r.frame.toHtml()
soup = BeautifulSoup(result, 'html.parser')
info = soup.find_all('h4')
for i in info:
print(i.get_text())
l = 0
while True:
func = timeout(5)(ScrapeClockwise)
try:
func(str(l))
except Exception as e:
print(e)
pass  # handle errors here
l += 1

每种技术都有其局限性,在Qt的情况下,您不能在辅助线程中使用QWebPage。您还必须了解技术是如何工作的,Qt的许多元素需要并使用事件循环,这可以帮助解决问题。在这种情况下,可以使用QTimer来测量经过的时间,如果触发了超时,则加载新页面。

使用这个问题,我修改以获得这个解决方案,考虑到以上内容:

from PyQt5 import QtCore, QtWidgets, QtWebKitWidgets
from bs4 import BeautifulSoup

def create_urls():
l = 0
while True:
yield "https://www.clockwisemd.com/hospitals/{:04d}/appointments/new".format(l)
l += 1

class WebPage(QtWebKitWidgets.QWebPage):
def __init__(self):
super(WebPage, self).__init__()
self.mainFrame().loadFinished.connect(self.handleLoadFinished)
self.mainFrame().urlChanged.connect(print)
self.timer = QtCore.QTimer(
singleShot=True, interval=10 * 1000, timeout=self.on_timeout
)
def start(self, generator):
self.generator = generator
self.fetchNext()
def fetchNext(self):
url = next(self.generator)
self.mainFrame().load(QtCore.QUrl(url))
self.timer.start()
def processCurrentPage(self):
html = self.mainFrame().toHtml()
print("[url]: {}".format(self.mainFrame().url().toString()))
soup = BeautifulSoup(html, "html.parser")
info = soup.find_all("h4")
for i in info:
print(i.get_text())
def on_timeout(self):
print("[Timeout]")
self.fetchNext()
def handleLoadFinished(self):
if self.timer.isActive():
self.timer.blockSignals(True)
self.timer.stop()
self.timer.blockSignals(False)
self.processCurrentPage()
self.fetchNext()

if __name__ == "__main__":
import sys
app = QtWidgets.QApplication(sys.argv)
webpage = WebPage()
webpage.start(create_urls())
sys.exit(app.exec_())

最新更新