我有两个Pandas数据框架(示例是说明性的)。列df1['list_of_keywords']
和df2['list_of_words']
各包含单词列表。
df1 = pd.DataFrame(columns=('some_data','another_data','list_of_keywords'))
df2= pd.DataFrame(columns=('something','something_more','something_else','list_of_words'))
df1:
list_of_keywords['word1', 'word2', 'word3'] ['word7', 'word8', 'word7']
你可以试试:
# Setup
import pandas as pd
df1 = pd.DataFrame(
{
"some_data": ["id0001", "id0002", "id0003"],
"another_data": [12391, 3233, 3426],
"list_of_keywords": [
["word1", "word2", "word3"],
["word7", "word8", "word7"],
["word1", "word2", "word4"],
],
}
)
df2 = pd.DataFrame(
{
"something": ["id_abcd", "id_eeed", "id_dgef"],
"something_more": ["ref34322", "ref5555", "ref2963"],
"something_else": ["some comment", "some comment", "some comment"],
"list_of_words": [
["word5", "word4", "word5", "word4", "word9"],
["word5", "word3", "word2", "word4", "word1"],
["word1", "word2", "word3", "word4", "word6"],
],
}
)
# Data preparation
lists_of_keywords = df1["list_of_keywords"].values
lists_of_words = df2["list_of_words"].values
# Iterate to find a match
match = {"in_df2": []}
for list_of_keywords in lists_of_keywords:
search = []
for list_of_words in lists_of_words:
if set(list_of_keywords).issubset(set(list_of_words)):
search.append(True)
else:
search.append(False)
if any(search):
match["in_df2"].append("True")
else:
match["in_df2"].append("False")
df1["in_df2"] = pd.DataFrame(match)
print(df1)
# Outputs
some_data another_data list_of_keywords in_df2
0 id0001 12391 [word1, word2, word3] True
1 id0002 3233 [word7, word8, word7] False
2 id0003 3426 [word1, word2, word4] True