我需要在Twitter上跟踪许多关键字,然后将推文发送到MongoDB。我将其用于我的代码:
我如何从Twitter的流api中消耗推文并将其存储在mongodb
中import json
import pymongo
import tweepy
consumer_key = ""
consumer_secret = ""
access_key = ""
access_secret = ""
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
class CustomStreamListener(tweepy.StreamListener):
def __init__(self, api):
self.api = api
super(tweepy.StreamListener, self).__init__()
self.db = pymongo.MongoClient().test
def on_data(self, tweet):
self.db.tweets.insert(json.loads(tweet))
def on_error(self, status_code):
return True # Don't kill the stream
def on_timeout(self):
return True # Don't kill the stream
sapi = tweepy.streaming.Stream(auth, CustomStreamListener(api))
to_track = ['keyword1', 'keyword2', 'keyword3']
sapi.filter(track = to_track)
我有没有办法跟踪哪个关键字负责每条推文?(不在每个搜索中进行GREP搜索)
我不确定on_data函数如何工作,但是您可以使用on_status并执行以下类似的操作:
import tweepy
consumer_key = ''
consumer_secret = ''
access_key = ''
access_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
class CustomStreamListener(tweepy.StreamListener):
def on_status(self, status):
tweet = status.text
words = tweet.split()
if 'keyword1' in words:
print "do something with keyword1"
self.db.tweets.insert(json.loads(tweet))
if 'keyword2' in words:
print "do something with keyword2"
self.db.tweets.insert(json.loads(tweet))
if 'keyword3' in words:
print "do something with keyword3"
self.db.tweets.insert(json.loads(tweet))
sapi = tweepy.streaming.Stream(auth, CustomStreamListener(api))
to_track = ['keyword1', 'keyword2', 'keyword3']
sapi.filter(track = to_track)