我对推特API和Tweepy很陌生,我对速率限制的概念感到困惑,我使用的是流式API,我想收集样本推特,而不使用任何过滤器,如标签或位置,一些来源表示我不应该对样本推特进行速率限制,因为我获得了1%的推特,还有一些其他状态。我经常遇到错误420,我想知道是否有办法避免它或使它更平滑?
非常感谢你的帮助
我的代码:
import json
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from textblob import TextBlob
from elasticsearch import Elasticsearch
from datetime import datetime
from config import *
es = Elasticsearch()
indexName = "test_new_fields"
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
class TweetStreamListener(StreamListener):
hashtags = []
def on_data(self, data):
dict_data = json.loads(data)
print(dict_data)
tweet = TextBlob(dict_data["text"])
if tweet.sentiment.polarity < 0:
sentiment = "negative"
elif tweet.sentiment.polarity == 0:
sentiment = "neutral"
else:
sentiment = "positive"
print (str(tweet.sentiment.polarity) + " " + sentiment + " " + dict_data["text"])
try:
if len(dict_data["entities"]["hashtags"]) != 0:
hashtags = dict_data["entities"]["hashtags"]
else:
hashtags= []
except:
pass
es.indices.put_settings(index=indexName, body={"index.blocks.write":False})
es.index(index=indexName,
doc_type="test-type",
body={"author": dict_data["user"]["screen_name"],
"date": dict_data["created_at"],
"location": dict_data["user"]["location"],
"followers": dict_data["user"]["followers_count"],
"friends": dict_data["user"]["friends_count"],
"time_zone": dict_data["user"]["time_zone"],
"lang": dict_data["user"]["lang"],
"timestamp": dict_data["timestamp_ms"],
"datetime": datetime.now(),
"message": dict_data["text"],
"hashtags": hashtags,
"polarity": tweet.sentiment.polarity,
"subjectivity": tweet.sentiment.subjectivity,
"sentiment": sentiment})
return True
def on_error(self, error):
print "error: " + str(error)
if __name__ == '__main__':
listener = TweetStreamListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
while True:
try:
stream = Stream(auth, listener)
stream.sample()
except KeyError:
pass