From 4101347ab24cc7094d205315f30925cb8ff24d12 Mon Sep 17 00:00:00 2001 From: Robin Cohen-Selmon <87440088+RobsOnWaves@users.noreply.github.com> Date: Wed, 17 Jan 2024 16:04:03 +0100 Subject: [PATCH 1/5] adding route for getting unique values in fields --- Code/libs/meps_handler.py | 4 ++ Code/libs/mongo_db_handler.py | 24 ++++++++ Code/public_api.py | 58 +++++++++++++++----- DevOps/apps/stacked_apps/docker-compose.yaml | 7 +++ 4 files changed, 79 insertions(+), 14 deletions(-) diff --git a/Code/libs/meps_handler.py b/Code/libs/meps_handler.py index 16ec723..8bf754d 100644 --- a/Code/libs/meps_handler.py +++ b/Code/libs/meps_handler.py @@ -20,6 +20,10 @@ def __init__(self): self.__messages__ = Messages() self.__max_length__ = 1000 self.__timeout_duration__ = 60 + self.__mep_field_list__ = ["MEP Name", "MEP nationalPoliticalGroup", "MEP politicalGroup", "Title", "Place", + "Meeting With", "Meeting Related to Procedure"] + self.__mep_db_name__ = "MEPS" + self.__collection_name__ = "meps_meetings" class TimeoutException(Exception): pass diff --git a/Code/libs/mongo_db_handler.py b/Code/libs/mongo_db_handler.py index 90d3991..34277be 100644 --- a/Code/libs/mongo_db_handler.py +++ b/Code/libs/mongo_db_handler.py @@ -295,3 +295,27 @@ def from_mongo_to_xlsx_meps(self): except Exception as e: print("Exception in getting meps documents in Mongo" + str(e)) return {"ged_insert_status": "Exception in getting meps documents in Mongo" + str(e)} + + def get_unique_values(self,db_name: str, collection_name: str, fields: list): + + # Connexion à la base de données MongoDB + client = self.__mongo_client__ + db = client[db_name] + collection = db[collection_name] + + # Initialiser un dictionnaire pour stocker les valeurs uniques + # Dictionnaire pour stocker les valeurs dédupliquées pour chaque champ + valeurs_dedupliquees = {} + + # Récupérer les valeurs dédupliquées pour chaque champ + for field in fields: + valeurs = collection.distinct(field) + # Convertir les valeurs en chaînes de caractères si elles ne le sont pas déjà + valeurs_conformes = [str(val) if not isinstance(val, str) else val for val in valeurs] + + valeurs_dedupliquees[field] = valeurs_conformes + + # Fermer la connexion à la base de données + client.close() + + return valeurs_dedupliquees diff --git a/Code/public_api.py b/Code/public_api.py index 2399fea..fef4ff2 100644 --- a/Code/public_api.py +++ b/Code/public_api.py @@ -22,6 +22,7 @@ from pythonjsonlogger import jsonlogger import time + class Roles(str, Enum): admin = "admin" user = "user" @@ -54,7 +55,6 @@ class Roles(str, Enum): print("Error getting SECRET_KEY") print(e) - ALGORITHM = "HS256" ACCESS_TOKEN_EXPIRE_MINUTES = 30 JSON_EXTENSION = ".json" @@ -101,6 +101,7 @@ class UserInDB(User): oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") + def get_user_id_from_token(token: str): try: payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM]) @@ -108,6 +109,7 @@ def get_user_id_from_token(token: str): except jwt.JWTError: return None + def log_to_json_file(log_data): log_file_path = 'logz.json' try: @@ -260,7 +262,7 @@ async def get_current_active_user(current_user: User = Depends(get_current_user) async def get_request_body(request: Request): body = await request.body() - async def app(scope, receive, send): + async def app(scope, send): async def override_receive(): return {"type": "http.request", "body": body} @@ -284,6 +286,7 @@ def mask_sensitive_data_and_exclude_files(body: str) -> str: return body + @app.middleware("http") async def log_requests(request: Request, call_next): start_time = time.time() @@ -304,22 +307,22 @@ async def log_requests(request: Request, call_next): token = await oauth2_scheme(request) user_id = get_user_id_from_token(token) if token else "anonymous" except Exception as e: - user_id = "error_in_token" + user_id = "error_in_token" + str(e) # Continuer le traitement de la requête response = await call_next(request) process_time = (time.time() - start_time) * 1000 # Logger les informations logger.info('Request info', extra={ - "timestamp": datetime.fromtimestamp(start_time).isoformat(), - "user_id": user_id, - 'request_method': request.method, - 'request_url': str(request.url), - 'response_status': response.status_code, - 'process_time_ms': process_time, + "timestamp": datetime.fromtimestamp(start_time).isoformat(), + "user_id": user_id, + 'request_method': request.method, + 'request_url': str(request.url), + 'response_status': response.status_code, + 'process_time_ms': process_time, 'request_body': body_text, # Autres informations... - }) + }) return response @@ -360,7 +363,7 @@ async def create_user(user_name: str = Form(), current_user: User = Depends(get_current_active_user)): if current_user.role == "admin": return mongo_handler.insert_user(user_name, full_name, email, get_password_hash(password), - current_user.username, role) + current_user.username, role) else: return {'response': 'Access denied'} @@ -498,7 +501,7 @@ async def load_meps_file(file: UploadFile, current_user: User = Depends(get_curr await meps_handler.load_csv_file(upload_file=file, answer=answer) if not answer['success']: - raise HTTPException(status_code=403, detail=messages.denied_entry) + raise HTTPException(status_code=404, detail=messages.nok_string_raw) else: mongo_handler.from_df_to_mongo_meps(df=answer['df'], collection_name="meps_meetings") return {'response': messages.build_ok_action_string(user_name=current_user.username)} @@ -507,17 +510,44 @@ async def load_meps_file(file: UploadFile, current_user: User = Depends(get_curr @app.get("/meps_file", - description="loads a file with the list pression groups meetings of MEPs into the database") + description="loads a file with the list pression groups meetings of MEPs into the database") async def get_meps_file(current_user: User = Depends(get_current_active_user)): if current_user.role in ['admin', 'meps']: mongo_handler.from_mongo_to_xlsx_meps() if mongo_handler.from_mongo_to_xlsx_meps(): return FileResponse('meps_fichier.xlsx') else: - raise HTTPException(status_code=403, detail=messages.denied_entry) + raise HTTPException(status_code=404, detail=messages.nok_string_raw) else: raise HTTPException(status_code=403, detail=messages.denied_entry) + +@app.get("/meps_file_fields_values", + description="gets a file with the list of the values of fields") +async def get_meps_file_selected_fields(current_user: User = Depends(get_current_active_user)): + if current_user.role in ['admin', 'meps']: + db_name = meps_handler.get_mep_db_name() + fields = meps_handler.get_mep_field_list() + collection_name = meps_handler.get_mep_collection_name() + + try: + values = mongo_handler.get_unique_values(db_name=db_name, collection_name=collection_name, fields=fields) + except Exception as e: + print("get_meps_file_selected_fields : " + str(e), flush=True) + raise HTTPException(status_code=404, detail=messages.nok_string_raw) + + if values is not None: + try: + return values + except Exception as e: + print("get_meps_file_selected_fields : " + str(e), flush=True) + raise HTTPException(status_code=404, detail=messages.nok_string_raw) + else: + raise HTTPException(status_code=404, detail=messages.nok_string_raw) + else: + raise HTTPException(status_code=403, detail=messages.denied_entry) + + @app.post("/logout") async def logout(): return {"message": "Disconnected, please log in again"} diff --git a/DevOps/apps/stacked_apps/docker-compose.yaml b/DevOps/apps/stacked_apps/docker-compose.yaml index bce8a4c..9d54207 100644 --- a/DevOps/apps/stacked_apps/docker-compose.yaml +++ b/DevOps/apps/stacked_apps/docker-compose.yaml @@ -15,6 +15,13 @@ services: 'PWD_MONGO': "rootmongopwd" + 'SECRET_KEY': "11088b752484acda51943b487d8657e142e91e085187c110e0967650e7526784" + + 'URL_FRONT': 'http://localhost:8080' + + volumes: + - $PWD/logs_preprod.log:/usr/src/app/app_logs.json + ports: - "5555:8000" From 6e88fb80d631a6c3507347989bde8b6b6566ae1d Mon Sep 17 00:00:00 2001 From: Robin Cohen-Selmon <87440088+RobsOnWaves@users.noreply.github.com> Date: Wed, 17 Jan 2024 16:56:01 +0100 Subject: [PATCH 2/5] adding route for getting unique values in fields --- Code/libs/meps_handler.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Code/libs/meps_handler.py b/Code/libs/meps_handler.py index 8bf754d..36f0f34 100644 --- a/Code/libs/meps_handler.py +++ b/Code/libs/meps_handler.py @@ -56,3 +56,11 @@ async def load_csv_file(self, upload_file: UploadFile, answer: dict = None): # Arrête le timer timer.cancel() + def get_mep_field_list(self): + return self.__mep_field_list__ + + def get_mep_db_name(self): + return self.__mep_db_name__ + + def get_mep_collection_name(self): + return self.__collection_name__ From 5b79cd27c1c95f1b8e6d8c899e4c123aff524774 Mon Sep 17 00:00:00 2001 From: Robin Cohen-Selmon <87440088+RobsOnWaves@users.noreply.github.com> Date: Wed, 17 Jan 2024 19:29:56 +0100 Subject: [PATCH 3/5] adding route for getting filtered values --- Code/libs/mongo_db_handler.py | 23 +++++++++++++++-------- Code/public_api.py | 25 +++++++++++++++++++++---- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/Code/libs/mongo_db_handler.py b/Code/libs/mongo_db_handler.py index 34277be..e550032 100644 --- a/Code/libs/mongo_db_handler.py +++ b/Code/libs/mongo_db_handler.py @@ -278,17 +278,22 @@ def from_df_to_mongo_meps(self, collection_name: str, df: pd.DataFrame): print("Exception in pushing meps documents in Mongo" + str(e)) return {"ged_insert_status": "Exception in pushing meps documents in Mongo" + str(e)} - def from_mongo_to_xlsx_meps(self): - db = self.__mongo_client__.MEPS - # Récupération des données - collection = db.meps_meetings # Nom de la collection + def from_mongo_to_xlsx(self, db_name: str, collection_name: str, query: dict): + + client = self.__mongo_client__ + db = client[db_name] + collection = db[collection_name] try: - data = list(collection.find({}, {'_id': False})) + data = list(collection.find(query, {'_id': False})) df = pd.DataFrame(data) - df['Date'] = df['Date'].dt.strftime('%d/%m/%Y') + if 'Date' in df.columns: + df['Date'] = df['Date'].dt.strftime('%d/%m/%Y') + else: + print('No Date column') + # Création d'un fichier Excel - excel_file_path = 'meps_fichier.xlsx' # Spécifiez le chemin et le nom de fichier souhaités + excel_file_path = 'export_file.xlsx' # Spécifiez le chemin et le nom de fichier souhaités df.to_excel(excel_file_path, index=False) return True @@ -296,7 +301,7 @@ def from_mongo_to_xlsx_meps(self): print("Exception in getting meps documents in Mongo" + str(e)) return {"ged_insert_status": "Exception in getting meps documents in Mongo" + str(e)} - def get_unique_values(self,db_name: str, collection_name: str, fields: list): + def get_unique_values(self, db_name: str, collection_name: str, fields: list): # Connexion à la base de données MongoDB client = self.__mongo_client__ @@ -319,3 +324,5 @@ def get_unique_values(self,db_name: str, collection_name: str, fields: list): client.close() return valeurs_dedupliquees + + diff --git a/Code/public_api.py b/Code/public_api.py index fef4ff2..979a304 100644 --- a/Code/public_api.py +++ b/Code/public_api.py @@ -21,6 +21,7 @@ import logging from pythonjsonlogger import jsonlogger import time +from typing import Optional class Roles(str, Enum): @@ -511,11 +512,27 @@ async def load_meps_file(file: UploadFile, current_user: User = Depends(get_curr @app.get("/meps_file", description="loads a file with the list pression groups meetings of MEPs into the database") -async def get_meps_file(current_user: User = Depends(get_current_active_user)): +async def get_meps_file(mep_name: Optional[str] = None, + national_political_group: Optional[str] = None, + political_group: Optional[str] = None, + title: Optional[str] = None, + place: Optional[str] = None, + meeting_with: Optional[str] = None, + current_user: User = Depends(get_current_active_user)): if current_user.role in ['admin', 'meps']: - mongo_handler.from_mongo_to_xlsx_meps() - if mongo_handler.from_mongo_to_xlsx_meps(): - return FileResponse('meps_fichier.xlsx') + db_name = meps_handler.get_mep_db_name() + collection_name = meps_handler.get_mep_collection_name() + wild_card = {"$regex": ".*"} + query = { + 'MEP Name': mep_name if mep_name is not None else wild_card, + 'MEP nationalPoliticalGroup': national_political_group if national_political_group is not None else wild_card, + 'MEP politicalGroup': political_group if political_group is not None else wild_card, + 'Title': title if title is not None else wild_card, + 'Place': place if place is not None else wild_card, + 'Meeting With': meeting_with if meeting_with is not None else wild_card + } + if mongo_handler.from_mongo_to_xlsx(db_name=db_name, collection_name=collection_name, query=query): + return FileResponse('export_file.xlsx') else: raise HTTPException(status_code=404, detail=messages.nok_string_raw) else: From 478bc3bbdb22c5435a5dfac2fe49a2bbb3c2a4d0 Mon Sep 17 00:00:00 2001 From: Robin Cohen-Selmon <87440088+RobsOnWaves@users.noreply.github.com> Date: Sat, 20 Jan 2024 11:39:32 +0100 Subject: [PATCH 4/5] adding security + improving search request --- Code/libs/mongo_db_handler.py | 12 ++++++++---- Code/public_api.py | 18 +++++++++++------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/Code/libs/mongo_db_handler.py b/Code/libs/mongo_db_handler.py index e550032..91ef1eb 100644 --- a/Code/libs/mongo_db_handler.py +++ b/Code/libs/mongo_db_handler.py @@ -315,13 +315,17 @@ def get_unique_values(self, db_name: str, collection_name: str, fields: list): # Récupérer les valeurs dédupliquées pour chaque champ for field in fields: valeurs = collection.distinct(field) - # Convertir les valeurs en chaînes de caractères si elles ne le sont pas déjà - valeurs_conformes = [str(val) if not isinstance(val, str) else val for val in valeurs] + valeurs_conformes = [] + for val in valeurs: + if not isinstance(val, str): + val = str(val) + # Tronquer la chaîne si elle dépasse 50 caractères et ajouter "..." + if len(val) > 50: + val = val[:50] + "..." + valeurs_conformes.append(val) valeurs_dedupliquees[field] = valeurs_conformes - # Fermer la connexion à la base de données - client.close() return valeurs_dedupliquees diff --git a/Code/public_api.py b/Code/public_api.py index 979a304..43e39b0 100644 --- a/Code/public_api.py +++ b/Code/public_api.py @@ -522,14 +522,18 @@ async def get_meps_file(mep_name: Optional[str] = None, if current_user.role in ['admin', 'meps']: db_name = meps_handler.get_mep_db_name() collection_name = meps_handler.get_mep_collection_name() - wild_card = {"$regex": ".*"} + + def wild_card(word_to_search: str) : + word_to_search = re.escape(word_to_search) + return {"$regex": ".*" + word_to_search + ".*"} + query = { - 'MEP Name': mep_name if mep_name is not None else wild_card, - 'MEP nationalPoliticalGroup': national_political_group if national_political_group is not None else wild_card, - 'MEP politicalGroup': political_group if political_group is not None else wild_card, - 'Title': title if title is not None else wild_card, - 'Place': place if place is not None else wild_card, - 'Meeting With': meeting_with if meeting_with is not None else wild_card + 'MEP Name': wild_card(mep_name) if mep_name is not None else wild_card(''), + 'MEP nationalPoliticalGroup': wild_card(national_political_group) if national_political_group is not None else wild_card(''), + 'MEP politicalGroup': wild_card(political_group) if political_group is not None else wild_card(''), + 'Title': wild_card(title) if title is not None else wild_card(''), + 'Place': wild_card(place) if place is not None else wild_card(''), + 'Meeting With': wild_card(meeting_with) if meeting_with is not None else wild_card('') } if mongo_handler.from_mongo_to_xlsx(db_name=db_name, collection_name=collection_name, query=query): return FileResponse('export_file.xlsx') From d58e4aa3bfb94ef0814a426fe630d13ad2cb2e2b Mon Sep 17 00:00:00 2001 From: Robin Cohen-Selmon <87440088+RobsOnWaves@users.noreply.github.com> Date: Sat, 20 Jan 2024 22:09:49 +0100 Subject: [PATCH 5/5] adding date selection --- Code/public_api.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Code/public_api.py b/Code/public_api.py index 43e39b0..0ffe7ad 100644 --- a/Code/public_api.py +++ b/Code/public_api.py @@ -518,6 +518,8 @@ async def get_meps_file(mep_name: Optional[str] = None, title: Optional[str] = None, place: Optional[str] = None, meeting_with: Optional[str] = None, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, current_user: User = Depends(get_current_active_user)): if current_user.role in ['admin', 'meps']: db_name = meps_handler.get_mep_db_name() @@ -525,7 +527,7 @@ async def get_meps_file(mep_name: Optional[str] = None, def wild_card(word_to_search: str) : word_to_search = re.escape(word_to_search) - return {"$regex": ".*" + word_to_search + ".*"} + return {"$regex": ".*" + word_to_search + ".*", "$options": "i"} query = { 'MEP Name': wild_card(mep_name) if mep_name is not None else wild_card(''), @@ -535,6 +537,14 @@ def wild_card(word_to_search: str) : 'Place': wild_card(place) if place is not None else wild_card(''), 'Meeting With': wild_card(meeting_with) if meeting_with is not None else wild_card('') } + + if start_date and end_date: + query['Date'] = {"$gte": start_date, "$lte": end_date} + elif start_date: + query['Date'] = {"$gte": start_date} + elif end_date: + query['Date'] = {"$lte": end_date} + if mongo_handler.from_mongo_to_xlsx(db_name=db_name, collection_name=collection_name, query=query): return FileResponse('export_file.xlsx') else: