剧作家错误:从未等待'Page.inner_html' 运行时警告:启用 tracemalloc 以获取对象分配回溯



我编写了一个剧作家异步函数,用于从我们的实验室信息系统中提取实验室结果。然而,尽管"等待"了我能看到的每一个请求,我还是努力摆脱了下面的错误。代码仍然可以成功运行,但是错误很烦人。使用Python 3.11错误产生的:

RuntimeWarning: coroutine 'Page.inner_html' was never awaited   if j >= 10 : 
RuntimeWarning: Enable tracemalloc to get the object allocation traceback

文件内部functions.py:

...
async def scrape_results(page, links_master):
master_results=pd.DataFrame()
j=1
for link in links_master:
await page.goto("https://trakcarelabwebview.nhls.ac.za/trakcarelab/csp/system.Home.cls" + link)
await page.wait_for_load_state(state="networkidle")
await page.wait_for_selector(loading_icon, state="hidden")
await page.wait_for_timeout(500)
await page.wait_for_selector(loading_icon, state="hidden")
await page.locator(test_item_caption).text_content(timeout=3000) == "TestItem"
#Getting results off the page
html = await page.content()
soup = BeautifulSoup(html, "lxml")
tables = soup.find_all('table')
dfs = pd.read_html(str(tables))
df=dfs[1]
try:
word_result = BeautifulSoup(page.inner_html("#web_EPVisitTestSet_WordResult_0-ngForm > div > div:nth-child(2)"), "lxml").text_content()
except:
episode = await page.locator("#web_EPVisitNumber_List_Banner-row-0-item-Episode").text_content()
collectiondate = await page.locator("#web_EPVisitTestSet_Result_0-item-CollectionDate").text_content()
collectiontime = await page.locator("#web_EPVisitTestSet_Result_0-item-CollectionTime").text_content()
resultdate = await page.locator("#web_EPVisitTestSet_Result_0-item-ResultDate").text_content()
resulttime = await page.locator("#web_EPVisitTestSet_Result_0-item-ResultTime").text_content()
specimen_info = {
'episode': episode,
'collectiondate': collectiondate,
'collectiontime': collectiontime,
'resultdate':resultdate,
'resulttime':resulttime
}
cleanup_results(df)
#df = df.assign("Episode"==specimen_info.episode)
#df = df.assign("Episode"==specimen_info['episode'])
df=df.assign(Episode=specimen_info['episode'])
df=df.assign(Collection_Date=specimen_info["collectiondate"])
df=df.assign(Collection_Time=specimen_info["collectiontime"])
df=df.assign(Result_Date=specimen_info["resultdate"])
df=df.assign(Result_Time=specimen_info["resulttime"])
master_results=pd.concat([master_results,df], ignore_index=True)
j+=1
print("Result number __" + str(j) + "__ extracted")
if j >= 10 :
print(master_results)
try:
print(word_result)
break
except:
print("No word_result found...")
break
...

我已经尝试在对剧作家页面对象的所有查询中添加await,但我可能错过了一些东西。

这是我调用函数的代码:

import asyncio
from playwright.async_api import Playwright, async_playwright, expect
import functions
#The elements to interact with
username_box = "#SSUser_Logon_0-item-USERNAME"
password_box = "#SSUser_Logon_0-item-PASSWORD"
...
_username = "User"
_password = "Password"
_param = "MRN139822539"

async def run(playwright: Playwright, _username, _password, _param) -> None:
browser = await playwright.chromium.launch(headless=False, slow_mo=50)
try:
context = await browser.new_context(storage_state="state.json")
print("Storage state loaded...")
except:
print("Error upon loading storage state - continuing with login...")
page = await context.new_page()
await page.goto("https://the-web-site-im-visiting.com")
all_links = await functions.get_links(page)
await functions.scrape_results(page, all_links)

context = await browser.new_context(storage_state="state.json")
await page.wait_for_timeout(2000)
await context.close()
await browser.close()

async def main() -> None:
async with async_playwright() as playwright:
await run(playwright,  _username, _password, _param)

asyncio.run(main())

我要说的是这一行:

word_result = BeautifulSoup(page.inner_html("#web_EPVisitTestSet_WordResult_0-ngForm > div > div:nth-child(2)"), "lxml").text_content()

你可以等待page.inner_html:

word_result = BeautifulSoup(await page.inner_html("#web_EPVisitTestSet_WordResult_0-ngForm > div > div:nth-child(2)"), "lxml").text_content()

相关内容