使用请求选择和上传pdf文件时出现问题



我正试图使用post-http请求在python中创建一个脚本,以便在网页中上传这个pdf文件。我尝试过如下操作,但不幸的是,脚本无法上传文件。

这是登录链接。以下是用户名SmthShift_123和密码>7/B!yzRd8wuK!N2供您考虑。现在转到此页面,单击最后一个选项卡Anhang,您将在其中找到上载选项。

为了让您可视化-这个就是该页面的样子。

这是我迄今为止的尝试:

import requests
from bs4 import BeautifulSoup
login_url = 'https://jobs.commerzbank.com/index.php?ac=login'
application_link = 'https://jobs.commerzbank.com/index.php?ac=application&jobad_id=30670'
target_link = 'https://jobs.commerzbank.com/index.php?ac=application&page=6'
upload_link = 'https://jobs.commerzbank.com/inc/candidate_attachments.php'

with requests.Session() as s:
s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'
res = s.get(login_url)
sauce = BeautifulSoup(res.text,"lxml")
elem = {i['name']:i.get('value','') for i in sauce.select('input[name]')}
elem['username'] = 'SmthShift_123'
elem['password'] = '7/B!yzRd8wuK!N2'
s.post(login_url,data=elem)
s.get(application_link)
resp = s.get(target_link)
soup = BeautifulSoup(resp.text,"lxml")
payload = {i['name']:i.get('value','') for i in soup.select('input[name]')}
payload['form-control'] = 'Anschreiben'
payload['upload'] = 'Datei hochladen'
payload['save'] = ''
files = {
'searchButton': open('CV.pdf','rb')
}
s.post(upload_link,files=files,data=payload)

当我执行上面的脚本时,它既不会保存该文件,也不会抛出任何错误。

我也试过这样做(只使用selenium进行上传(,但脚本也无法选择和上传文件:

s.post(login_url,data=elem)
s.get(application_link)
resp = s.get(target_link)
driver = webdriver.Chrome()
driver.get(resp.url)
driver.delete_all_cookies()
for cookie in s.cookies.items():
driver.add_cookie({"name": cookie[0], "value": cookie[1]})
driver.get(resp.url)
select = Select(WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "select#upload_category"))))
select.select_by_visible_text("Lebenslauf")
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "input#upload_file"))).send_keys("C://Users/WCS/Desktop/CV.pdf")

如何使用请求选择并上传pdf文件?

我可以使用selenium上传它。这个网站很棘手。它有一个隐藏的input,只有在悬停要上传的按钮时才会出现。

试试这个:

from selenium import webdriver
from time import sleep
from selenium.webdriver.common.by import By
from selenium.webdriver.support.select import Select
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

login_url = 'https://jobs.commerzbank.com/index.php?ac=login'
driver = webdriver.Chrome()
driver.implicitly_wait(5)
driver.maximize_window()
driver.get(login_url)
driver.find_element(By.ID, 'nav_login').click()
driver.find_element(By.CSS_SELECTOR, 'div.popover-content #quick-login-username').send_keys('SmthShift_123')
driver.find_element(By.CSS_SELECTOR, 'div.popover-content #quick-login-password').send_keys('7/B!yzRd8wuK!N2')
driver.find_element(By.CSS_SELECTOR,"div.popover-content #quick_login_form button[type='submit']").click()
driver.get('https://jobs.commerzbank.com/index.php?ac=application&jobad_id=30670')
driver.find_element(By.CSS_SELECTOR, ".applicationform-tab[data-pagenumber='6']").click()
select = Select(
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "select#upload_category"))))
select.select_by_visible_text("Lebenslauf")
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "input#upload_file")))
e = driver.find_element(By.CSS_SELECTOR, "input#upload_file")
# Hover over and wait for tooltip to appear
action = ActionChains(driver)
action.move_to_element_with_offset(e, 5, 5)
action.perform()
sleep(3)
# Now, let's search for a hidden input and send keys
e = driver.find_element(By.CSS_SELECTOR, "input[name='attachment']")
e.send_keys("/<path>/CV.pdf")
# And click to upload:
e = driver.find_element(By.CSS_SELECTOR, "input#start_file_upload_button")
e.click()

希望,这对你也有用。祝你好运

解决方案1

js file具有attachFfwAjaxUpload()功能,可上传附件文件。

find(name, attrs, recursive, text, **kwargs)-匹配并返回第一个对象。

示例

import requests
from bs4 import BeautifulSoup
login_url = 'https://jobs.commerzbank.com/index.php?ac=login'
application_link = 'https://jobs.commerzbank.com/index.php?ac=application&jobad_id=30670'
target_link = 'https://jobs.commerzbank.com/index.php?ac=application&page=6'
upload_link = 'https://jobs.commerzbank.com/inc/candidate_attachments.php'
with requests.Session() as sessionObj:
sessionObj.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'
res = sessionObj.get(login_url)
sauce = BeautifulSoup(res.text,"lxml")
elem = {i['name']:i.get('value','') for i in sauce.select('input[name]')}
elem['username'] = 'SmthShift_123'
elem['password'] = '7/B!yzRd8wuK!N2'
sessionObj.post(login_url,data=elem)
sessionObj.get(application_link)
resp = sessionObj.get(target_link)
soup = BeautifulSoup(resp.text,"lxml")
# get attachment form tag object
form = soup.find("form", attrs={'action':'index.php'})
payload = dict()
# set upload category
# you have four category option, values are 2, 1, 4 and 12, 
# select one value option
payload['category'] = '12'
payload['application_token'] = form.find('input', 
attrs={'name':'application_token'}).get('value','')
payload['action'] = 'upload'
# you can see upload file attachment attachFfwAjaxUpload() function in 
# frontend.min.js file in browser source tab between 38878 to 38903 lines
print(payload)
with open('CV.pdf', 'rb') as f:
file = {"attachment": f}
atteachment_response = sessionObj.post(upload_link, files=file, data=payload)
# print post request response status code 
print(atteachment_response.status_code)
print(atteachment_response.text)

解决方案2-通过硒上传文件

from selenium.webdriver import Chrome
from selenium.webdriver.common.by import By    
from selenium.webdriver.support.select import Select
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
login_url = 'https://jobs.commerzbank.com/index.php?ac=login'
application_link = 'https://jobs.commerzbank.com/index.php?ac=application&jobad_id=30670'
driver = Chrome()
driver.implicitly_wait(5)
driver.maximize_window()
driver.get(login_url)
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID,'login')))
# set username
username = driver.find_element(By.ID, 'login-username')
username.send_keys('SmthShift_123')
# set password
password = driver.find_element(By.ID, 'login-password')
password.send_keys('7/B!yzRd8wuK!N2')
# login
login = driver.find_element_by_xpath("//form[@id='login']//button[@type='submit']")
login.click()
# after login switched into target url
driver.get(application_link)
driver.find_element(By.CSS_SELECTOR, ".applicationform-tab[data-pagenumber='6']").click()
# wait and select upload category
select = Select(
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "upload_category"))))
select.select_by_value("12")
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "upload_file")))
e = driver.find_element(By.ID, "upload_file")
action = ActionChains(driver)
action.move_to_element_with_offset(e, 5, 5)
action.perform()
# find attachment input tag by name
upload = driver.find_element(By.NAME, "attachment")
# set pdf file path
upload.send_keys("/CV.pdf")
# upload file
upload = driver.find_element(By.ID, "start_file_upload_button")
upload.click()

最新更新