我挣扎的代码最初是在mainclass中-并且工作得很好!我从书里拿出来,用了几次!我试图改变它,使它运行在自己的类"twitter_data"。但是由于某种原因,函数"handle_twitter_http_error"不像以前那样工作了…它只是不再捕获错误…!我很确定这与新的职业结构有关,到目前为止我对这一点知之甚少!
代码如下:
import twitter
import sys
import time
from urllib2 import URLError
from httplib import BadStatusLine
import json
from functools import partial
from sys import maxint
from six import string_types
from datetime import datetime
from datetime import timedelta
class twitter_data:
def __init__(self):
self.twitter_api = self.oauth_login()
def oauth_login(self):
CONSUMER_KEY = 'KEY'
CONSUMER_SECRET = 'SECRET'
OAUTH_TOKEN = 'TOKEN'
OAUTH_TOKEN_SECRET = 'TOKEN_SECRET'
auth = twitter.oauth.OAuth(OAUTH_TOKEN,OAUTH_TOKEN_SECRET,CONSUMER_KEY,CONSUMER_SECRET)
twitter_api=twitter.Twitter(auth=auth)
return twitter_api
def handle_twitter_http_error(self,e, wait_period=2, sleep_when_rate_limited=True):
if wait_period > 3600:
print >> sys.stderr, 'Too many retries. Quitting'
raise e
if e.e.code == 401:
print >> sys.stderr, 'Encountered 401 Error (Not Authorized)'
return None
if e.e.code == 404:
print >> sys.stderr, 'Encountered 404 Error (Not Found)'
return None
elif e.e.code == (429,420):
print >> sys.stderr, 'Encountered 429/420 Error (Rate Limit Exceeded)',sleep_when_rate_limited
if sleep_when_rate_limited:
print >> sys.stderr, 'Retrying in 15 minutes'
sys.stderr.flush()
time.sleep(60*15+5)
print >>sys.stderr, 'Awake now and retrying'
return 2
else:
raise e
elif e.e.code == (500,502,503,504):
print >> sys.stderr, 'Encountered %i Error. Retrying in %i seconds' %(e.e.code, wait_period)
time.sleep(wait_period)
wait_period *= 1.5
return wait_period
else:
raise e
def make_twitter_request(self,twitter_api_func,max_errors=10,*args, **kw):
wait_period = 2
error_count =0
while True:
try:
return twitter_api_func(*args, **kw)
except twitter.api.TwitterHTTPError, e:
error_count = 0
wait_period = self.handle_twitter_http_error(e,wait_period)
if wait_period is None:
return
except URLError, e:
error_count +=1
print >> sys.stderr, 'URLError encountered. Continuing.'
if error_count > max_errors:
print >> sys.stderr, 'Too many errors...bailing out.'
raise
except BadStatusLine, e:
error_count +=1
print >> sys.stderr, 'BadStatusLine encountered. Continuing.'
if error_count > max_errors:
print >> sys.stderr, 'Too many consecutive errors...bailing out.'
raise
def get_friends_followers_ids(self, screen_name=None, user_id=None, friends_limit=maxint, followers_limit=maxint):
assert (screen_name != None) != (user_id != None),
"Must have screen_name or user_id, but not both"
get_friends_ids = partial(self.make_twitter_request, self.twitter_api.friends.ids,
count=5000)
get_followers_ids = partial(self.make_twitter_request, self.twitter_api.followers.ids,
count=5000)
friends_ids, followers_ids = [], []
for twitter_api_func, limit, ids, label in [
[get_friends_ids, friends_limit, friends_ids, "friends"],
[get_followers_ids, followers_limit, followers_ids, "followers"]
]:
if limit == 0: continue
cursor = -1
while cursor != 0:
if screen_name:
response = twitter_api_func(screen_name=screen_name, cursor=cursor)
else:
response = twitter_api_func(user_id=user_id, cursor=cursor)
if response is not None:
ids += response['ids']
cursor = response['next_cursor']
print >> sys.stderr, 'Fetched {0} total {1} ids for {2}'.format(len(ids),
label, (user_id or screen_name))
if len(ids) >= limit or response is None:
break
return friends_ids[:friends_limit], followers_ids[:followers_limit]
然后我用下面的代码来运行它:
td = twitter_data()
td.get_friends_followers_ids(screen_name = 'Madonna')
这是我得到的错误:
---------------------------------------------------------------------------
TwitterHTTPError Traceback (most recent call last)
<ipython-input-35-420d4a0a4512> in <module>()
----> 1 td.get_friends_followers_ids(screen_name = 'Madonna')
<ipython-input-29-d4275257b38f> in get_friends_followers_ids(self, screen_name, user_id, friends_limit, followers_limit)
174
175 if screen_name:
--> 176 response = twitter_api_func(screen_name=screen_name, cursor=cursor)
177 else:
178 response = twitter_api_func(user_id=user_id, cursor=cursor)
<ipython-input-29-d4275257b38f> in make_twitter_request(self, twitter_api_func, max_errors, *args, **kw)
68 except twitter.api.TwitterHTTPError, e:
69 error_count = 0
---> 70 wait_period = self.handle_twitter_http_error(e,wait_period)
71 if wait_period is None:
72 return
<ipython-input-29-d4275257b38f> in handle_twitter_http_error(self, e, wait_period, sleep_when_rate_limited)
55 return wait_period
56 else:
---> 57 raise e
58
59
TwitterHTTPError: Twitter sent status 429 for URL: 1.1/followers/ids.json using parameters: (count=5000&cursor=-1&oauth_consumer_key=o92R2ryg511yw6yK0zXwHBk4Z&oauth_nonce=8966871855796102181&oauth_signature_method=HMAC-SHA1&oauth_timestamp=1440000524&oauth_token=3013233011-jzjcfNa4D5m08k1TbWHeekl9h7Yt9Mu9U6jEPqv&oauth_version=1.0&screen_name=Madonna&oauth_signature=z8mD3kbclM5UOXTbB2E1VguSxHU%3D)
details: {"errors":[{"message":"Rate limit exceeded","code":88}]}
谢谢大家的帮助!
更新:我做了一个"打印调试",并要求它"打印e.e.code",这就是我得到的:
td.get_friends_followers_ids(screen_name='Madonna')
429
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "testdriver.py", line 113, in get_friends_followers_ids
response = twitter_api_func(screen_name=screen_name, cursor=cursor)
File "testdriver.py", line 70, in make_twitter_request
wait_period = self.handle_twitter_http_error(e,wait_period)
File "testdriver.py", line 58, in handle_twitter_http_error
raise e
twitter.api.TwitterHTTPError: Twitter sent status 429 for URL: 1.1/followers/ids.json using parameters: (count=5000&cursor=1507581380542740144&oauth_consumer_key=o92R2ryg511yw6yK0zXwHBk4Z&oauth_nonce=4075478308692875785&oauth_signature_method=HMAC-SHA1&oauth_timestamp=1440078685&oauth_token=3013233011-jzjcfNa4D5m08k1TbWHeekl9h7Yt9Mu9U6jEPqv&oauth_version=1.0&screen_name=Madonna&oauth_signature=zZRpNFAQGR1IWcVSwK0dAFOJMuk%3D)
details: {"errors":[{"message":"Rate limit exceeded","code":88}]}
所以它确实包含e.e.code的信息,但由于某种原因没有捕获它!!
我终于想通了:谢谢Grysik用
的想法e.e.code in (429,420)
解决方案完全有效!由于