forked from scolpig/Movie_for_you
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjob09_word_cloud.py
70 lines (48 loc) · 1.51 KB
/
job09_word_cloud.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import collections
from matplotlib import font_manager, rc
import matplotlib as mpl
import numpy as np
from PIL import Image
fontpath = './malgun.ttf'
font_name = font_manager.FontProperties(
fname=fontpath).get_name()
mpl.rcParams['axes.unicode_minus']=False
rc('font', family=font_name)
df = pd.read_csv('./crawling_data/datasets/movie_review_2018_2022.csv')
print(df.head())
words = df.iloc[469, 1]
print(words)
words = words.split()
print(words)
worddict_1 = collections.Counter(words)
worddict_1 = dict(worddict_1)
worddict_1 = dict(sorted(worddict_1.items(),
key=lambda item:item[1],
reverse=True))
print(worddict_1)
words = df.iloc[737, 1]
# print(words)
words = words.split()
# print(words)
worddict_2 = collections.Counter(words)
worddict_2 = dict(worddict_2)
worddict_2 = dict(sorted(worddict_2.items(),
key=lambda item:item[1],
reverse=True))
print(worddict_2)
wordcloud_img_1 = WordCloud(
background_color='white', max_words=500,
font_path=fontpath).generate_from_frequencies(worddict_1)
wordcloud_img_2 = WordCloud(
background_color='white', max_words=500,
font_path=fontpath).generate_from_frequencies(worddict_2)
plt.figure(figsize=(12, 4))
plt.imshow(wordcloud_img_1, interpolation='bilinear')
plt.axis('off')
plt.figure(figsize=(12, 4))
plt.imshow(wordcloud_img_2, interpolation='bilinear')
plt.axis('off')
plt.show()