-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserver.py
99 lines (72 loc) · 2.84 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from flask import Flask, jsonify, request
from flask_cors import CORS
from engine.core.models.tf_idf.tfidf_engine import TfidfEngine
from engine.core.models.word2vec.word2vec_engine import Word2VecEngine
from engine.core.preprocess.arabic.arabic_preprocessor import ArabicPreprocessor
from engine.core.preprocess.english.preprocessor import TextPreprocessor
from engine.core.preprocess.french.french_preprocessor import FrenchPreprocessor
from engine.core.spell_checker.arabic_spell_checker import ArabicSpellChecker
from engine.core.spell_checker.spell_checker import SpellChecker
from engine.utils.data_factory import DataFactory
from engine.utils.file_handler import FileHandler
from engine.utils.files_paths import WIKIR_DOCUMENTS_PATH, WIKIR_NAME, TYDI_DOCUMENTS_PATH, TYDI_NAME, \
FR_WIKIR_DOCUMENTS_PATH, FR_WIKIR_NAME
app = Flask(__name__)
CORS(app)
@app.route('/choose-dataset', methods=["POST"])
def choose_dataset():
global corpus
global engine
global spell_checker
global dataset_name
payload = request.get_json()
dataset = payload.get('dataset')
dataset_name = dataset
if dataset == WIKIR_NAME:
corpus = FileHandler.read_csv_file(WIKIR_DOCUMENTS_PATH)
engine = Word2VecEngine(corpus, WIKIR_NAME)
spell_checker = SpellChecker()
elif dataset == TYDI_NAME:
corpus = FileHandler.read_jsonl_file(TYDI_DOCUMENTS_PATH, 15000)
engine = Word2VecEngine(corpus, TYDI_NAME)
spell_checker = ArabicSpellChecker()
else:
corpus = FileHandler.read_csv_file(FR_WIKIR_DOCUMENTS_PATH, 15000)
engine = Word2VecEngine(corpus, FR_WIKIR_NAME)
spell_checker = SpellChecker()
response = {
"status": True,
"data": "Dataset (" + dataset + ") Has Been Uploaded Successfully."
}
return jsonify(response)
@app.route('/correct', methods=["POST"])
def correct():
payload = request.get_json()
query = payload.get('query')
query = spell_checker.correct(query)
response = {
"status": True,
"query": query
}
return jsonify(response)
@app.route('/search', methods=["POST"])
def index():
payload = request.get_json()
query = payload.get('query')
if dataset_name == WIKIR_NAME:
processor = TextPreprocessor()
elif dataset_name == TYDI_NAME:
processor = ArabicPreprocessor()
else:
processor = FrenchPreprocessor()
data_factory = DataFactory(processor)
query = data_factory.create_processed_text(query)
similarities = engine.calculate_similarities(query)
results = engine.retrieve_similar_documents(similarities)
response = {
"status": True,
"documents": results
}
return jsonify(response)
if __name__ == "__main__":
app.run(debug=False)