
  • 本文关键字:形容词 评论 计算 python pandas nltk
  • 更新时间 :
  • 英文 :



import pandas as pd
data = {'reviews':['Very professional operation. Room is very clean and comfortable',
'Daniel is the most amazing host! His place is extremely clean, and he provides everything you could possibly want (comfy bed, guidebooks & maps, mini-fridge, towels, even toiletries). He is extremely friendly and helpful.',
'The room is very quiet, and well decorated, very clean.',
'He provides the room with towels, tea, coffee and a wardrobe.',
'Daniel is a great host. Always recomendable.',
'My friend and I were very satisfied with our stay in his apartment.']}
df = pd.DataFrame(data)
nouns = ['place','Amsterdam','apartment','location','host','stay','city','room','everything','time','house',
verbs_adj = ['was','is','great','nice','had','clean','were','recommend','stay','are','good','perfect','comfortable',



{'room': {'is': 1, 'clean': 1, 'comfortable': 1}


def count_co_occurences(reviews):
# Iterate on each review and count
occurences_per_review = {
f"review_{i+1}": {
noun: dict(Counter(review.lower().split(" ")))
for noun in nouns
if noun in review.lower()
for i, review in enumerate(reviews)
# Remove verb_adj not found in main list
opr = deepcopy(occurences_per_review)
for review, occurences in opr.items():
for noun, counts in occurences.items():
for verb_adj in counts.keys():
if verb_adj not in verbs_adj:
del occurences_per_review[review][noun][verb_adj]

return occurences_per_review



import pandas as pd
import nltk
from collections import Counter
data = {'reviews':['Very professional operation. Room is very clean and comfortable',
'Daniel is the most amazing host! His place is extremely clean, and he provides everything you could possibly want (comfy bed, guidebooks & maps, mini-fridge, towels, even toiletries). He is extremely friendly and helpful.',
'The room is very quiet, and well decorated, very clean.',
'He provides the room with towels, tea, coffee and a wardrobe.',
'Daniel is a great host. Always recomendable.',
'My friend and I were very satisfied with our stay in his apartment.']}
df = pd.DataFrame(data)
nouns = ['place','Amsterdam','apartment','location','host','stay','city','room','everything','time','house',
verbs_adj = ['was','is','great','nice','had','clean','were','recommend','stay','are','good','perfect','comfortable',
def buildict(x):
tokens = nltk.word_tokenize(x)
tokenslower = list(map(str.lower, tokens)) 
allnouns=[word for word in tokenslower if word in nouns]
allverbs_adj=Counter(word for word in tokenslower if word in verbs_adj)
for noun in allnouns:
return occurdict
df['words']=df['reviews'].apply(lambda x: buildict(x))


0   Very professional operation. Room is very clea...   {'room': {'is': 1, 'clean': 1, 'comfortable': 1}}
1   Daniel is the most amazing host! His place is ...   {'host': {'is': 3, 'amazing': 1, 'clean': 1, '...
2   The room is very quiet, and well decorated, ve...   {'room': {'is': 1, 'quiet': 1, 'clean': 1}}
3   He provides the room with towels, tea, coffee ...   {'room': {}}
4   Daniel is a great host. Always recomendable.    {'host': {'is': 1, 'great': 1}}
5   My friend and I were very satisfied with our s...   {'stay': {'were': 1, 'stay': 1}, 'apartment': ...
