如何解决"string indices must be integers"(使用 conversation_id 检索推文回复时出错)?



我使用TwitterAPI通过下面提到的示例代码提取使用conversation_id的推文回复:这个想法是根据数千个conversation_id列表提取所有回复,并且我确实有学术证书,所以档案搜索应该不会有问题。

from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterRequestError, TwitterConnectionError, TwitterPager
# NOTE: If conversation is over a week old then it will not get returned.
CONVERSATION_ID = '1369393783482236933'
class TreeNode:
    def __init__(self, data):
        """data is a tweet's json object"""
        self.data = data
        self.children = []
        self.replied_to_tweet = None
        if 'referenced_tweets' in self.data:
            for tweet in self.data['referenced_tweets']: 
                if tweet['type'] == 'replied_to':
                    self.replied_to_tweet = tweet['id']
                    break
    def id(self):
        """a node is identified by its tweet id"""
        return self.data['id']
    def parent(self):
        """the reply-to tweet is the parent of the node"""
        return self.replied_to_tweet
    def find_parent_of(self, node):
        """append a node to the children of it's parent tweet"""
        if node.parent() == self.id():
            self.children.append(node)
            return True
        for child in self.children:
            if child.find_parent_of(node):
                return True
        return False
    def print_tree(self, level):
        """level 0 is the root node, then incremented for subsequent generations"""
        created_at = self.data['created_at']
        username = self.data['author_id']['username']
        text_80chars = self.data['text'][0:80].replace('n', ' ')
        print(f'{level*"_"}{level}: [{created_at}][{username}] {text_80chars}')
        level += 1
        for child in reversed(self.children):
            child.print_tree(level)
try:
    o = TwitterOAuth.read_file()
    api = TwitterAPI(o.consumer_key, o.consumer_secret, auth_type='oAuth2', api_version='2')
    # GET ROOT OF THE CONVERSATION
    r = api.request(f'tweets/:{CONVERSATION_ID}',
        {
            'expansions':'author_id',
            'tweet.fields':'author_id,conversation_id,created_at,referenced_tweets'
        },
        hydrate_type=HydrateType.APPEND)
    for item in r:
        root = TreeNode(item)
        print(f'ROOT {root.id()}')
    # GET ALL REPLIES IN CONVERSATION
    # (RETURNED IN REVERSE CHRONOLOGICAL ORDER)
    pager = TwitterPager(api, 'tweets/search/recent', 
        {
            'query':f'conversation_id:{CONVERSATION_ID}',
            'expansions':'author_id',
            'tweet.fields':'author_id,conversation_id,created_at,referenced_tweets'
        },
        hydrate_type=HydrateType.APPEND)
    # "wait=2" means wait 2 seconds between each request.
    # The rate limit is 450 requests per 15 minutes, or
    # 15*60/450 = 2 seconds. 
    orphans = []
    for item in pager.get_iterator(wait=2):
        node = TreeNode(item)
        print(f'{node.id()} => {node.parent()}', item['author_id']['username'])
        # COLLECT ANY ORPHANS THAT ARE CHILDREN OF THE NEW NODE
        orphans = [orphan for orphan in orphans if not node.find_parent_of(orphan)]
        # IF THE NEW NODE CANNOT BE PLACED IN TREE, ORPHAN IT UNTIL ITS PARENT IS FOUND
        if not root.find_parent_of(node):
            orphans.append(node)
    print('nTREE...')
    root.print_tree(0)
    assert len(orphans) == 0, f'{len(orphans)} orphaned tweets'
except TwitterRequestError as e:
    print(e.status_code)
    for msg in iter(e):
        print(msg)
except TwitterConnectionError as e:
    print(e)
except Exception as e:
    print(e)

如果我对最后两行进行注释,则会详细显示错误。

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
C:UsersANEESB~1AppDataLocalTemp/ipykernel_18696/4104024841.py in <module>
     88 
     89         print('nTREE...')
---> 90         root.print_tree(0)
     91         assert len(orphans) == 0, f'{len(orphans)} orphaned tweets'
     92 
C:UsersANEESB~1AppDataLocalTemp/ipykernel_18696/4104024841.py in print_tree(self, level)
     37                 """level 0 is the root node, then incremented for subsequent generations"""
     38                 created_at = self.data['created_at']
---> 39                 username = self.data['author_id']['username']
     40                 text_80chars = self.data['text'][0:80].replace('n', ' ')
     41                 print(f'{level*"_"}{level}: [{created_at}][{username}] {text_80chars}')
TypeError: string indices must be integers

代码应该可以工作,我不知道是什么原因导致了错误。请帮忙吗?

Self.data如下所示:

{
    'author_id': '3420477195', 
    'conversation_id': '1369393783482236933', 
    'created_at': '2021-03-09T21:04:54.000Z', 
    'text': "Happy one year anniversary to everyone working from home! Do you feel like if you have one more Zoom meeting you’ll rip your hair out? First of all, please don't do that. Second, we're here to save you from Zoom boredom with these new backgrounds!", 
    'id': '1369393783482236933', 
    'author_id_hydrate': {'id': '3420477195', 'name': 'Full Frontal', 'username': 'FullFrontalSamB'}
}

author_id只是一个字符串,关于作者的详细信息在author_id_hydrate中。所以self.data['author_id']['username']应该是self.data['author_id_hydrate']['username']

最新更新