WSoC-VITAP · ItsMeg01 · Aug 26, 2021 · Aug 26, 2021
diff --git a/Tasks/summarizer.py b/Tasks/summarizer.py
@@ -0,0 +1,201 @@
+# IMPORTS
+import math
+import re
+import nltk
+import urllib.request
+from bs4 import BeautifulSoup
+from nltk import sent_tokenize, word_tokenize
+from nltk.corpus import stopwords
+from nltk.stem.porter import PorterStemmer
+
+def generate_text(link):
+    page = urllib.request.urlopen(link)
+    soup = BeautifulSoup(page, "lxml")
+    text = ""
+
+    for paragraph in soup.find_all('p'):
+        text += paragraph.text
+
+    return text
+
+# Method for creating a frequency matrix for the sentences of the text
+def create_frequency_matrix(sentences):
+    frequency_matrix = {}
+    stopWords = set(stopwords.words("english"))
+    ps = PorterStemmer()
+
+    for sent in sentences:
+        freq_table = {}
+        words = word_tokenize(sent)
+        for word in words:
+            word = word.lower()
+            word = ps.stem(word)
+            if word in stopWords:
+                continue
+
+            if word in freq_table:
+                freq_table[word] += 1
+            else:
+                freq_table[word] = 1
+
+        frequency_matrix[sent[:15]] = freq_table
+
+    return frequency_matrix
+
+# Method for creating the TF(Term Frequency) matrix
+def create_tf_matrix(freq_matrix):
+    tf_matrix = {}
+
+    for sent, f_table in freq_matrix.items():
+        tf_table = {}
+
+        count_words_in_sentence = len(f_table)
+        for word, count in f_table.items():
+            tf_table[word] = count / count_words_in_sentence
+
+        tf_matrix[sent] = tf_table
+
+    return tf_matrix
+
+
+
+def create_documents_per_words(freq_matrix):
+    word_per_doc_table = {}
+
+    for sent, f_table in freq_matrix.items():
+        for word, count in f_table.items():
+            if word in word_per_doc_table:
+                word_per_doc_table[word] += 1
+            else:
+                word_per_doc_table[word] = 1
+
+    return word_per_doc_table
+
+# Method for creating the IDF(Inverse Document Frequency) matrix
+def create_idf_matrix(freq_matrix, count_doc_per_words, total_documents):
+    idf_matrix = {}
+
+    for sent, f_table in freq_matrix.items():
+        idf_table = {}
+
+        for word in f_table.keys():
+            idf_table[word] = math.log10(total_documents / float(count_doc_per_words[word]))
+
+        idf_matrix[sent] = idf_table
+
+    return idf_matrix
+
+# Method for creating a combined TF-IDF matrix
+def create_tf_idf_matrix(tf_matrix, idf_matrix):
+    tf_idf_matrix = {}
+
+    for (sent1, f_table1), (sent2, f_table2) in zip(tf_matrix.items(), idf_matrix.items()):
+
+        tf_idf_table = {}
+
+        for (word1, value1), (word2, value2) in zip(f_table1.items(),
+                                                    f_table2.items()):  # here, keys are the same in both the table
+            tf_idf_table[word1] = float(value1 * value2)
+
+        tf_idf_matrix[sent1] = tf_idf_table
+
+    return tf_idf_matrix
+
+
+# Method for scoring the 'importance' of a sentence with reference to the TF-IDF matrix
+def score_sentences(tf_idf_matrix) -> dict:
+    sentenceValue = {}
+
+    for sent, f_table in tf_idf_matrix.items():
+        total_score_per_sentence = 0
+
+        count_words_in_sentence = len(f_table)
+        for word, score in f_table.items():
+            total_score_per_sentence += score
+
+        sentenceValue[sent] = total_score_per_sentence / count_words_in_sentence
+
+    return sentenceValue
+
+# Method for finding the average score pf the sentences
+def find_average_score(sentenceValue) -> int:
+    sumValues = 0
+    for entry in sentenceValue:
+        sumValues += sentenceValue[entry]
+
+    # Average value of a sentence from original summary text
+    average = (sumValues / len(sentenceValue))
+
+    return average
+
+
+# Method to generate summary from the given sentence scores
+def generate_summary(sentences, sentenceValue, threshold):
+    sentence_count = 0
+    summary = ''
+
+    for sentence in sentences:
+        if sentence[:15] in sentenceValue and sentenceValue[sentence[:15]] >= (threshold):
+            summary += " " + sentence
+            sentence_count += 1
+
+    return summary
+
+
+def summarize(text):
+    # 1 Sentence Tokenize
+    sentences = sent_tokenize(text)
+    total_documents = len(sentences)
+
+    # 2 Create the Frequency matrix of the words in each sentence.
+    freq_matrix = create_frequency_matrix(sentences)
+
+    # 3 Calculate TermFrequency and generate a matrix
+    tf_matrix = create_tf_matrix(freq_matrix)
+
+    # 4 creating table for documents per words
+    count_doc_per_words = create_documents_per_words(freq_matrix)
+
+    # 5 Calculate IDF and generate a matrix
+    idf_matrix = create_idf_matrix(freq_matrix, count_doc_per_words, total_documents)
+
+    # 6 Calculate TF-IDF and generate a matrix
+    tf_idf_matrix = create_tf_idf_matrix(tf_matrix, idf_matrix)
+
+    # 7 Important Algorithm: score the sentences
+    sentence_scores = score_sentences(tf_idf_matrix)
+
+    # 8 Find the threshold
+    threshold = find_average_score(sentence_scores)
+
+    # 9 Important Algorithm: Generate the summary
+    summary = generate_summary(sentences, sentence_scores, 1.15 * threshold)
+    return summary
+
+# Method for cleaning the summary text, i.e. removing extra brackets and numbers
+def clean_text(summary):
+    summary = re.sub(r'\[[0-9]*\]', ' ', summary)
+    summary = re.sub(r'\s+', ' ', summary)
+    summary = re.sub('[^a-zA-Z0-9+.]', ' ', summary )
+    summary = re.sub(r'\s+', ' ', summary)
+
+    return summary
+
+
+
+if __name__ == '__main__':
+    text_str = generate_text("https://en.wikipedia.org/wiki/Greek_mythology")
+    result = summarize(text_str)
+    print(clean_text(result))
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Tasks/task1/color.html b/Tasks/task1/color.html
@@ -0,0 +1,17 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Document</title>
+</head>
+<body>
+    <h2>Color-Picker</h2>
+    <input type="text"  id="colorInput">
+    <input type="color" name="color" id="col">
+    <input type="button" value="Change color" id="colorButton">
+
+    <script src="color.js"></script>
+</body>
+</html>
diff --git a/Tasks/task1/color.js b/Tasks/task1/color.js
@@ -0,0 +1,9 @@
+const but = document.getElementById('colorButton');
+but.addEventListener('click', changeColor);
+
+ function changeColor(){
+    let color = document.getElementById('col').value;
+    document.getElementById('colorInput').value = color;
+    document.body.style.background= color;
+
+ }
diff --git a/Tasks/task1/manifest.json b/Tasks/task1/manifest.json
@@ -0,0 +1,15 @@
+{
+    "name": "Color Picker",
+    "description": "add your own color to the background",
+    "version": "1.0",
+    "manifest_version": 2,
+    "browser_action": {
+        "default_popup": "color.html"
+      },
+    "content_scripts":[{
+        "matches": ["<all_urls>"],
+        "js":["color.js"],
+        "run_at":"document_end"
+    }]
+
+    }