-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMain.py
100 lines (84 loc) · 3.09 KB
/
Main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import re
import tweepy
from tweepy import OAuthHandler
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from rake_nltk import Rake
import json
from gensim.summarization import keywords
consumer_key = 'XXXXXX'
consumer_secret = 'XXXXXX'
access_token = 'XXXXXX'
access_token_secret = 'XXXXXX'
try:
# create OAuthHandler object
auth = OAuthHandler(consumer_key, consumer_secret)
# set access token and secret
auth.set_access_token(access_token, access_token_secret)
# create tweepy API object to fetch tweets
api = tweepy.API(auth)
except:
print("Error: Authentication Failed")
def clean_tweet(tweet):
'''
Utility function to clean tweet text by removing links, special characters
using simple regex statements.
'''
return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])| (\w+:\ / \ / \S+)", " ", tweet).split())
# def get_tweet_sentiment(tweet):
# '''
# Utility function to classify sentiment of passed tweet
# using textblob's sentiment method
# '''
# # create TextBlob object of passed tweet text
# # analysis = TextBlob(clean_tweet(tweet))
# analysis = TextBlob(tweet)
# # set sentiment
# return analysis.sentiment
# # return analysis.sentiment.polarity
def get_tweet_sentiment(sentence):
sentiment = SentimentIntensityAnalyzer()
score = sentiment.polarity_scores(sentence)
return score['compound']
pos_tweets = []
neg_tweets = []
def get_tweets(query, c=99):
'''
Main function to fetch tweets and parse them.
'''
# empty list to store parsed tweets
tweets = []
average_sentiment = 0
num = 0
allpos = ""
allneg = ""
try:
# call twitter api to fetch tweets
fetched_tweets = api.search(q=query, count=c)
# parsing tweets one by one
for tweet in fetched_tweets:
# empty dictionary to store required params of a tweet
parsed_tweet = {}
# saving text of tweet
parsed_tweet['text'] = clean_tweet(tweet.text)
# saving sentiment of tweet
sent = get_tweet_sentiment(tweet.text)
if sent > 0:
allpos += clean_tweet(tweet.text) + " "
else:
allneg += clean_tweet(tweet.text) + " "
average_sentiment += sent
num += 1
poswords = keywords(allpos,words = 10,scores = False, lemmatize = True, split=True)
negwords = keywords(allneg,words = 10,scores = False, lemmatize = True, split=True)
poswords2 = [i for i in poswords if len(i) > 3 and i != 'https']
negwords2 = [i for i in negwords if len(i) > 3 and i != 'https']
print(poswords2)
print(negwords2)
return json.dumps({'avg-sentiment': average_sentiment / num,
'pos-keywords': poswords2,
'neg-keywords': negwords2})
except tweepy.TweepError as e:
# print error (if any)
print("Error : " + str(e))