如何创建一个文件夹来存储从网站抓取的图像



我已经编写了以下代码来从网站刮擦中提取每个产品的图像。我对此非常陌生,不确定如何阻止它为每个产品创建一个新文件夹。目前,它在之前的文件夹中创建了一个名为Whiteline Images的新文件夹,也名为Whiteline Images -当它有5个产品时,很容易手动修复-当我将其更改为500+时,就不那么容易了!!我知道它在代码的哪个地方做这个…只是不知道如何修复它。任何帮助都是感激的!

import requests
from bs4 import BeautifulSoup
import os
def imagedown(url,folder):
try:
os.mkdir(os.path.join(os.getcwd(), folder))
except:
pass    
os.chdir(os.path.join(os.getcwd(), folder)) 
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
images = soup.findAll('img',{"src":True})
for index, image in enumerate(images, start=1):
if(image.get('src').startswith('https://imageapi.partsdb.com.au/api/Image')):
link = (image.get('src'))
name = f'{soup.find("div", {"class": "head2BR"}).text} ({index})'
with open(name + '.jpg','wb') as f:
im = requests.get(link) 
f.write(im.content)
print('Writing:', name)
imagedown('https://www.whiteline.com.au/product_detail4.php?part_number=KBR15', 'whiteline_images')
imagedown('https://www.whiteline.com.au/product_detail4.php?part_number=W13374', 'whiteline_images')
imagedown('https://www.whiteline.com.au/product_detail4.php?part_number=BMR98', 'whiteline_images')
imagedown('https://www.whiteline.com.au/product_detail4.php?part_number=W51210', 'whiteline_images')
imagedown('https://www.whiteline.com.au/product_detail4.php?part_number=W51211', 'whiteline_images')

在将图像写入目录时使用os.path.join,而不是更改目录:

import requests, os
from bs4 import BeautifulSoup
def imagedown(url, folder):
if not os.path.isdir(folder): #cleaner to use os.path.isdir when checking for folder existence
os.mkdir(folder)
soup = BeautifulSoup(requests.get(url).text, 'html.parser')
for index, image in enumerate(soup.findAll('img',{"src":True}), start=1): 
if image.get('src').startswith('https://imageapi.partsdb.com.au/api/Image'):
link = image.get('src')
name = f'{soup.find("div", {"class": "head2BR"}).text} ({index})'
with open(os.path.join(folder, name + '.jpg'), 'wb') as f: #join folder name to new image name
im = requests.get(link) 
f.write(im.content)

编辑:更新后的解决方案:

def imagedown(url, folder):
if not os.path.isdir(folder): #cleaner to use os.path.isdir when checking for folder existence
os.mkdir(folder)
soup = BeautifulSoup(requests.get(url).text, 'html.parser')
for i, a in enumerate(soup.select('img:is(.mainman, .thumbbot)'), 1):
name = soup.select_one('div.head2BR').text+f'({i})'
with open(os.path.join(folder, name + '.jpg'), 'wb') as f: #join folder name to new image name
im = requests.get(a['src']) 
f.write(im.content)
imagedown('https://www.whiteline.com.au/product_detail4.php?part_number=KBR15', 'whiteline_images')
imagedown('https://www.whiteline.com.au/product_detail4.php?part_number=W13374', 'whiteline_images')
imagedown('https://www.whiteline.com.au/product_detail4.php?part_number=BMR98', 'whiteline_images')
imagedown('https://www.whiteline.com.au/product_detail4.php?part_number=W51210', 'whiteline_images')
imagedown('https://www.whiteline.com.au/product_detail4.php?part_number=W51211', 'whiteline_images')

最新更新