我在python(2.7版)中使用tweepy来获取twitter数据。我已经能够获得历史推文,但当试图用直播代码获得相同的结果时,我得到了错误"数据必须是字节字符串"。
我已经导入了所有相关的包,并且之前已经定义了我的消费者令牌secret&访问令牌、秘密等
auth = tweepy.OAuthHandler(consumer_token, consumer_secret)
api = tweepy.API(auth)
auth.set_access_token(access_token, access_secret)
class CustomStreamListener(tweepy.StreamListener):
def on_status(self, status):
print status.author.screen_name, status.created_at, status.text
def on_error(self, status_code):
print >> sys.stderr, 'Encountered error with status code:', status_code
return True
def on_timeout(self):
print >> sys.stderr, 'Timeout...'
return True
streamingAPI = tweepy.streaming.Stream(auth, CustomStreamListener())
streamingAPI.filter(track=['cats'])
错误消息如下:
TypeError Traceback (most recent call last)
<ipython-input-34-262a8f805ba0> in <module>()
39
40 streamingAPI = tweepy.streaming.Stream(auth, CustomStreamListener())
---> 41 streamingAPI.filter(track=['cats'])
C:Python27libsite-packagestweepystreaming.pyc in filter(self, follow, track, async, locations, stall_warnings, languages, encoding)
426 self.session.params = {'delimited': 'length'}
427 self.host = 'stream.twitter.com'
--> 428 self._start(async)
429
430 def sitestream(self, follow, stall_warnings=False,
C:Python27libsite-packagestweepystreaming.pyc in _start(self, async)
344 self._thread.start()
345 else:
--> 346 self._run()
347
348 def on_closed(self, resp):
C:Python27libsite-packagestweepystreaming.pyc in _run(self)
237 stream=True,
238 auth=auth,
--> 239 verify=self.verify)
240 if resp.status_code != 200:
241 if self.listener.on_error(resp.status_code) is False:
C:Python27libsite-packagesrequestssessions.pyc in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
459 }
460 send_kwargs.update(settings)
--> 461 resp = self.send(prep, **send_kwargs)
462
463 return resp
C:Python27libsite-packagesrequestssessions.pyc in send(self, request, **kwargs)
571
572 # Send the request
--> 573 r = adapter.send(request, **kwargs)
574
575 # Total elapsed time of the request (approximately)
C:Python27libsite-packagesrequestsadapters.pyc in send(self, request, stream, timeout, verify, cert, proxies)
368 decode_content=False,
369 retries=self.max_retries,
--> 370 timeout=timeout
371 )
372
C:Python27libsite-packagesrequestspackagesurllib3connectionpool.pyc in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, **response_kw)
516 httplib_response = self._make_request(conn, method, url,
517 timeout=timeout,
--> 518 body=body, headers=headers)
519
520 # If we're going to release the connection in ``finally:``, then
C:Python27libsite-packagesrequestspackagesurllib3connectionpool.pyc in _make_request(self, conn, method, url, timeout, **httplib_request_kw)
328 # conn.request() calls httplib.*.request, not the method in
329 # urllib3.request. It also calls makefile (recv) on the socket.
--> 330 conn.request(method, url, **httplib_request_kw)
331
332 # Reset the timeout for the recv() on the socket
C:Python27libhttplib.pyc in request(self, method, url, body, headers)
999 def request(self, method, url, body=None, headers={}):
1000 """Send a complete request to the server."""
-> 1001 self._send_request(method, url, body, headers)
1002
1003 def _set_content_length(self, body):
C:Python27libhttplib.pyc in _send_request(self, method, url, body, headers)
1033 for hdr, value in headers.iteritems():
1034 self.putheader(hdr, value)
-> 1035 self.endheaders(body)
1036
1037 def getresponse(self, buffering=False):
C:Python27libhttplib.pyc in endheaders(self, message_body)
995 else:
996 raise CannotSendHeader()
--> 997 self._send_output(message_body)
998
999 def request(self, method, url, body=None, headers={}):
C:Python27libhttplib.pyc in _send_output(self, message_body)
852 #message_body was not a string (i.e. it is a file) and
853 #we must run the risk of Nagle
--> 854 self.send(message_body)
855
856 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
C:Python27libhttplib.pyc in send(self, data)
824 datablock = data.read(blocksize)
825 else:
--> 826 self.sock.sendall(data)
827
828 def _output(self, s):
C:Python27libsite-packagesrequestspackagesurllib3contribpyopenssl.pyc in sendall(self, data)
218 def sendall(self, data):
219 while len(data):
--> 220 sent = self._send_until_done(data)
221 data = data[sent:]
222
C:Python27libsite-packagesrequestspackagesurllib3contribpyopenssl.pyc in _send_until_done(self, data)
208 while True:
209 try:
--> 210 return self.connection.send(data)
211 except OpenSSL.SSL.WantWriteError:
212 _, wlist, _ = select.select([], [self.socket], [],
C:Python27libsite-packagesOpenSSLSSL.pyc in send(self, buf, flags)
945 buf = buf.tobytes()
946 if not isinstance(buf, bytes):
--> 947 raise TypeError("data must be a byte string")
948
949 result = _lib.SSL_write(self._ssl, buf, len(buf))
TypeError: data must be a byte string
我完全不知所措,希望有人能帮忙。非常感谢。
安装pyOpenSSL 0.15.1
https://pypi.python.org/pypi/pyOpenSSL
这将修复错误。
我也遇到了这个问题,但只是在安装ndg httpsclient之后,因为我的Anaconda灌输给了我一个不安全平台警告。我卸载了ndg-httpsclient,重新启动了我的iPython笔记本,然后再次运行流,它就工作了。
你有整理过吗?当我迁移服务器时,我也遇到了同样的问题(它从命令行和其他地方跨管理命令进行迁移。)这个解决方案看起来有点棘手,但你有两个选择。
你可以对OpenSSL的使用进行猴子补丁,我试过了,但只是间歇性的。
答案似乎是新版本(3+)使用了urllib3,这打破了一些向后兼容性;我在另一个程序中使用urllib3时遇到了类似的问题。我花了大约4个小时试图在Debian上运行这一切,而同样的代码在Ubuntu/Centos-6和Mac OSx上也能运行——这让人沮丧。
首先,尝试安装请求pip install requests
——这似乎解决了我看到的大约60%的问题。
如果这不起作用,就把tweepy降级回2.3.0——我刚刚做了这个,它又起作用了。
pip uninstall tweepy
pip install tweepy==2.3.0
对我来说,这是一个提醒,提醒我在构建新服务器时使用pip-freeze>requirements.txt——我只是手动安装了所有东西,因为这是一次非常轻松的安装,因此损失了半天时间。