Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added files #4

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 201 additions & 0 deletions Tasks/summarizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
# IMPORTS
import math
import re
import nltk
import urllib.request
from bs4 import BeautifulSoup
from nltk import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

def generate_text(link):
page = urllib.request.urlopen(link)
soup = BeautifulSoup(page, "lxml")
text = ""

for paragraph in soup.find_all('p'):
text += paragraph.text

return text

# Method for creating a frequency matrix for the sentences of the text
def create_frequency_matrix(sentences):
frequency_matrix = {}
stopWords = set(stopwords.words("english"))
ps = PorterStemmer()

for sent in sentences:
freq_table = {}
words = word_tokenize(sent)
for word in words:
word = word.lower()
word = ps.stem(word)
if word in stopWords:
continue

if word in freq_table:
freq_table[word] += 1
else:
freq_table[word] = 1

frequency_matrix[sent[:15]] = freq_table

return frequency_matrix

# Method for creating the TF(Term Frequency) matrix
def create_tf_matrix(freq_matrix):
tf_matrix = {}

for sent, f_table in freq_matrix.items():
tf_table = {}

count_words_in_sentence = len(f_table)
for word, count in f_table.items():
tf_table[word] = count / count_words_in_sentence

tf_matrix[sent] = tf_table

return tf_matrix



def create_documents_per_words(freq_matrix):
word_per_doc_table = {}

for sent, f_table in freq_matrix.items():
for word, count in f_table.items():
if word in word_per_doc_table:
word_per_doc_table[word] += 1
else:
word_per_doc_table[word] = 1

return word_per_doc_table

# Method for creating the IDF(Inverse Document Frequency) matrix
def create_idf_matrix(freq_matrix, count_doc_per_words, total_documents):
idf_matrix = {}

for sent, f_table in freq_matrix.items():
idf_table = {}

for word in f_table.keys():
idf_table[word] = math.log10(total_documents / float(count_doc_per_words[word]))

idf_matrix[sent] = idf_table

return idf_matrix

# Method for creating a combined TF-IDF matrix
def create_tf_idf_matrix(tf_matrix, idf_matrix):
tf_idf_matrix = {}

for (sent1, f_table1), (sent2, f_table2) in zip(tf_matrix.items(), idf_matrix.items()):

tf_idf_table = {}

for (word1, value1), (word2, value2) in zip(f_table1.items(),
f_table2.items()): # here, keys are the same in both the table
tf_idf_table[word1] = float(value1 * value2)

tf_idf_matrix[sent1] = tf_idf_table

return tf_idf_matrix


# Method for scoring the 'importance' of a sentence with reference to the TF-IDF matrix
def score_sentences(tf_idf_matrix) -> dict:
sentenceValue = {}

for sent, f_table in tf_idf_matrix.items():
total_score_per_sentence = 0

count_words_in_sentence = len(f_table)
for word, score in f_table.items():
total_score_per_sentence += score

sentenceValue[sent] = total_score_per_sentence / count_words_in_sentence

return sentenceValue

# Method for finding the average score pf the sentences
def find_average_score(sentenceValue) -> int:
sumValues = 0
for entry in sentenceValue:
sumValues += sentenceValue[entry]

# Average value of a sentence from original summary text
average = (sumValues / len(sentenceValue))

return average


# Method to generate summary from the given sentence scores
def generate_summary(sentences, sentenceValue, threshold):
sentence_count = 0
summary = ''

for sentence in sentences:
if sentence[:15] in sentenceValue and sentenceValue[sentence[:15]] >= (threshold):
summary += " " + sentence
sentence_count += 1

return summary


def summarize(text):
# 1 Sentence Tokenize
sentences = sent_tokenize(text)
total_documents = len(sentences)

# 2 Create the Frequency matrix of the words in each sentence.
freq_matrix = create_frequency_matrix(sentences)

# 3 Calculate TermFrequency and generate a matrix
tf_matrix = create_tf_matrix(freq_matrix)

# 4 creating table for documents per words
count_doc_per_words = create_documents_per_words(freq_matrix)

# 5 Calculate IDF and generate a matrix
idf_matrix = create_idf_matrix(freq_matrix, count_doc_per_words, total_documents)

# 6 Calculate TF-IDF and generate a matrix
tf_idf_matrix = create_tf_idf_matrix(tf_matrix, idf_matrix)

# 7 Important Algorithm: score the sentences
sentence_scores = score_sentences(tf_idf_matrix)

# 8 Find the threshold
threshold = find_average_score(sentence_scores)

# 9 Important Algorithm: Generate the summary
summary = generate_summary(sentences, sentence_scores, 1.15 * threshold)
return summary

# Method for cleaning the summary text, i.e. removing extra brackets and numbers
def clean_text(summary):
summary = re.sub(r'\[[0-9]*\]', ' ', summary)
summary = re.sub(r'\s+', ' ', summary)
summary = re.sub('[^a-zA-Z0-9+.]', ' ', summary )
summary = re.sub(r'\s+', ' ', summary)

return summary



if __name__ == '__main__':
text_str = generate_text("https://en.wikipedia.org/wiki/Greek_mythology")
result = summarize(text_str)
print(clean_text(result))












17 changes: 17 additions & 0 deletions Tasks/task1/color.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Document</title>
</head>
<body>
<h2>Color-Picker</h2>
<input type="text" id="colorInput">
<input type="color" name="color" id="col">
<input type="button" value="Change color" id="colorButton">

<script src="color.js"></script>
</body>
</html>
9 changes: 9 additions & 0 deletions Tasks/task1/color.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
const but = document.getElementById('colorButton');
but.addEventListener('click', changeColor);

function changeColor(){
let color = document.getElementById('col').value;
document.getElementById('colorInput').value = color;
document.body.style.background= color;

}
15 changes: 15 additions & 0 deletions Tasks/task1/manifest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"name": "Color Picker",
"description": "add your own color to the background",
"version": "1.0",
"manifest_version": 2,
"browser_action": {
"default_popup": "color.html"
},
"content_scripts":[{
"matches": ["<all_urls>"],
"js":["color.js"],
"run_at":"document_end"
}]

}