好吧,所以我正在创建一个从页面下载视频的小脚本,但我无法获得正确的下载速度。我知道download_speed=downloaded_file_size/(current_time-start_time),但我就是做不好。
我做错了什么?
'''
Python Trilulilu Downloader
Support for Video and Audio
Support for online view
Author: sharkyz of rstforums.com
'''
import re
from multiprocessing.pool import ThreadPool as Pool
import pyprind
import requests
import bs4
import time
url = 'http://www.trilulilu.ro/video-film/pitbull-ay-chico-lengua-afuera-1'
class commands(object):
def __init__(self, httpadress):
self.httpadress = httpadress
def main_function(self): # Acess, Find, Rewrite, Download
pool = Pool(2)
page = requests.get(self.httpadress)
soup = bs4.BeautifulSoup(page.text, 'lxml')
locatescript = soup.find(text=re.compile('swfobject.embedSWF'))
keys = re.findall(r'"([^,]*?)":', locatescript)
values = re.findall(r'(?<=:)(?:"(.*?)"|d+)', locatescript)
vovu = dict(zip(keys, values))
video_test = ['http://fs{servers}.trilulilu.ro/stream.php?type=video&'
'source=site&hash={hashs}&username={userids}&key={keys}'
'&format=flv-vp6&sig=&exp='.format(servers=vovu['server'],
hashs=vovu['hash'],
userids=vovu['userid'],
keys=vovu['key']),
'http://fs{servers}.trilulilu.ro/stream.php?type=video&'
'source=site&hash={hashs}&username={userids}&key={keys}'
'&format=mp4-360p&sig=&exp='.format(servers=vovu['server'],
hashs=vovu['hash'],
userids=vovu['userid'],
keys=vovu['key'])]
# Name the file
page_title = soup.title.string # Title of trilulilu page
title_chooser = page_title.split(' - ') # Split the title wherever '-' and create a list with elements
# Search for the right link to download
for link in video_test:
respond = requests.get(link, stream=True)
file_size = int(respond.headers.get('Content-Length', 0))
if file_size > 1048576:
# Check if the link was the mp4 or the flv format and choose name
if 'mp4' in link:
local_name_file = '{} - {}.mp4'.format(title_chooser[0],title_chooser[1])
elif 'flv' in link:
local_name_file = '{} - {}.flv'.format(title_chooser[0],title_chooser[1])
else:
print('Download stopped, not recognizable format!')
print('Downloading now...nFile:{}nSize:{}M'.format(local_name_file, round(file_size / 1000/ 1000, 2)))
# Progress Bar
bar = pyprind.ProgBar(file_size / 1024, monitor=True)
file_downloaded_size = 0
with open(local_name_file, 'wb') as f:
dl = 0
for chunk in respond.iter_content(chunk_size=1024):
if chunk:
dl += len(chunk)
start_time = time.mktime(time.localtime())
f.write(chunk)
end_time = time.mktime(time.localtime())
print(dl / (end_time / start_time))
f.flush()
bar.update()
print()
print(bar)
start = commands(url).main_function()
start
我的结果是:
2048.0
3072.0
4096.0
5120.0
6144.0
7168.0
8192.0
9216.0
10240.0
11264.0
12288.0
13312.0
14336.0
15360.0
16384.0
17408.0
18432.0
19456.0
20480.0
21504.0
22528.0
23552.0
24576.0
25600.0
26624.0
27648.0
28672.0
29696.0
30720.0
31744.0
32768.0
33792.0
34816.0
35840.0
36864.0
37888.0
38912.0
39936.0
40960.0
41984.0
43008.0
44032.0
45056.0
46080.0
47104.0
48128.0
49152.0
50176.0
51200.0
而且还在不断增加!
这似乎是代码的相关部分:
for chunk in respond.iter_content(chunk_size=1024): if chunk: dl += len(chunk) start_time = time.mktime(time.localtime()) f.write(chunk) end_time = time.mktime(time.localtime()) print(dl / (end_time / start_time))
如果是这样的话,您正在计算将数据块写入文件所需的时间,该时间应该或多或少是恒定的。另一方面,dl变量,即下载的总大小,并没有停止增长。您应该将start_time从循环中取出,并在开始时对其进行初始化。