refactor(presidio): extract Server into separate file

dettanym · Apr 28, 2024 · 51583a6 · 51583a6
1 parent 521fec3
commit 51583a6
Show file tree

Hide file tree

Showing 2 changed files with 191 additions and 187 deletions.
diff --git a/presidio/server/__main__.py b/presidio/server/__main__.py
@@ -1,197 +1,11 @@
 """REST API server for analyzer."""
 
-import logging
 import os
-from logging.config import fileConfig
-from pathlib import Path
-from typing import Tuple
 
-from flask import Flask, Response, jsonify, request
-from presidio_analyzer.analyzer_engine import AnalyzerEngine
-from presidio_analyzer.analyzer_request import AnalyzerRequest
-from presidio_analyzer.batch_analyzer_engine import BatchAnalyzerEngine
-from presidio_anonymizer import BatchAnonymizerEngine
-from werkzeug.exceptions import HTTPException
-
-from .helpers import convert_all_lists_to_dicts, extract_data_types_from_results
+from .server import Server
 
 DEFAULT_PORT = "3000"
 
-LOGGING_CONF_FILE = "logging.ini"
-
-WELCOME_MESSAGE = r"""
- _______  _______  _______  _______ _________ ______  _________ _______
-(  ____ )(  ____ )(  ____ \(  ____ \\__   __/(  __  \ \__   __/(  ___  )
-| (    )|| (    )|| (    \/| (    \/   ) (   | (  \  )   ) (   | (   ) |
-| (____)|| (____)|| (__    | (_____    | |   | |   ) |   | |   | |   | |
-|  _____)|     __)|  __)   (_____  )   | |   | |   | |   | |   | |   | |
-| (      | (\ (   | (            ) |   | |   | |   ) |   | |   | |   | |
-| )      | ) \ \__| (____/\/\____) |___) (___| (__/  )___) (___| (___) |
-|/       |/   \__/(_______/\_______)\_______/(______/ \_______/(_______)
-"""
-
-
-class Server:
-    """HTTP Server for calling Presidio Analyzer."""
-
-    def __init__(self):
-        fileConfig(Path(Path(__file__).parent, LOGGING_CONF_FILE))
-        self.logger = logging.getLogger("presidio-analyzer")
-        self.logger.setLevel(os.environ.get("LOG_LEVEL", self.logger.level))
-        self.app = Flask(__name__)
-        self.logger.info("Starting analyzer engine")
-        self.engine = AnalyzerEngine()
-        self.batch_analyzer = BatchAnalyzerEngine(analyzer_engine=self.engine)
-        self.batch_anonymizer = BatchAnonymizerEngine()
-        self.logger.info(WELCOME_MESSAGE)
-
-        @self.app.route("/health")
-        def health() -> str:
-            """Return basic health probe result."""
-            return "Presidio Analyzer service is up"
-
-        @self.app.route("/analyze", methods=["POST"])
-        def analyze() -> Tuple[Response, int]:
-            """Execute the analyzer function."""
-            # Parse the request params
-            try:
-                req_data = AnalyzerRequest(request.get_json())
-                if not req_data.text:
-                    raise Exception("No text provided")
-
-                if not req_data.language:
-                    raise Exception("No language provided")
-
-                recognizer_result_list = self.engine.analyze(
-                    text=req_data.text,
-                    language=req_data.language,
-                    correlation_id=req_data.correlation_id,
-                    score_threshold=req_data.score_threshold,
-                    entities=req_data.entities,
-                    return_decision_process=req_data.return_decision_process,
-                    ad_hoc_recognizers=req_data.ad_hoc_recognizers,
-                    context=req_data.context,
-                )
-
-                return jsonify(recognizer_result_list), 200
-            except TypeError as te:
-                error_msg = (
-                    f"Failed to parse /analyze request "
-                    f"for AnalyzerEngine.analyze(). {te.args[0]}"
-                )
-                self.logger.error(error_msg)
-                return jsonify(error=error_msg), 400
-
-            except Exception as e:
-                self.logger.error(
-                    f"A fatal error occurred during execution of "
-                    f"AnalyzerEngine.analyze(). {e}"
-                )
-                return jsonify(error=e.args[0]), 500
-
-        @self.app.route("/recognizers", methods=["GET"])
-        def recognizers() -> Tuple[Response, int]:
-            """Return a list of supported recognizers."""
-            language = request.args.get("language")
-            try:
-                recognizers_list = self.engine.get_recognizers(language)
-                names = [o.name for o in recognizers_list]
-                return jsonify(names), 200
-            except Exception as e:
-                self.logger.error(
-                    f"A fatal error occurred during execution of "
-                    f"AnalyzerEngine.get_recognizers(). {e}"
-                )
-                return jsonify(error=e.args[0]), 500
-
-        @self.app.route("/supportedentities", methods=["GET"])
-        def supported_entities() -> Tuple[Response, int]:
-            """Return a list of supported entities."""
-            language = request.args.get("language")
-            try:
-                entities_list = self.engine.get_supported_entities(language)
-                return jsonify(entities_list), 200
-            except Exception as e:
-                self.logger.error(
-                    f"A fatal error occurred during execution of "
-                    f"AnalyzerEngine.supported_entities(). {e}"
-                )
-                return jsonify(error=e.args[0]), 500
-
-        @self.app.errorhandler(HTTPException)
-        def http_exception(e):
-            return jsonify(error=e.description), e.code
-
-        @self.app.route("/batchanalyze", methods=["POST"])
-        def batch_analyze() -> Tuple[Response, int]:
-            """Execute the batch analyzer function."""
-            # Parse the request params
-            try:
-                request_obj = request.get_json()
-                print(request_obj["json_to_analyze"], type(request_obj))
-                if (
-                    "json_to_analyze" not in request_obj
-                    or request_obj["json_to_analyze"] is None
-                ):
-                    raise Exception(
-                        "Please set a JSON field named 'json_to_analyze' in the body, with the JSON object "
-                        "to analyze."
-                    )
-
-                # Note that this function implementation already adds the key as additional 'context'
-                # for the decision (see batch_analyzer_engine.py line 96)
-                recognizer_result_list = self.batch_analyzer.analyze_dict(
-                    input_dict=convert_all_lists_to_dicts(
-                        request_obj["json_to_analyze"]
-                    ),
-                    language="en",
-                )
-                print(recognizer_result_list)
-
-                unique_pii_list = extract_data_types_from_results(
-                    recognizer_result_list
-                )
-
-                unique_valid_pii_list = [
-                    pii for pii in unique_pii_list if pii in data_items_set
-                ]
-
-                return jsonify(unique_valid_pii_list), 200
-            except TypeError as te:
-                error_msg = (
-                    f"Failed to parse /batchanalyze request "
-                    f"for AnalyzerEngine.analyze(). {te.args[0]}"
-                )
-                self.logger.error(error_msg)
-                return jsonify(error=error_msg), 400
-
-            except Exception as e:
-                self.logger.error(
-                    f"A fatal error occurred during execution of "
-                    f"BatchAnalyzer.analyze_dict(). {e}"
-                )
-                return jsonify(error=e.args[0]), 500
-
-
-data_items_set = [
-    "CREDIT_CARD",
-    "NRP",
-    "US_ITIN",
-    "PERSON",
-    "US_BANK_NUMBER",
-    "US_PASSPORT",
-    "IP_ADDRESS",
-    "US_DRIVER_LICENSE",
-    "CRYPTO",
-    "URL",
-    "PHONE_NUMBER",
-    "IBAN_CODE",
-    "DATE_TIME",
-    "LOCATION",
-    "EMAIL_ADDRESS",
-    "US_SSN",
-]
-
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", DEFAULT_PORT))
     server = Server()

diff --git a/presidio/server/server.py b/presidio/server/server.py
@@ -0,0 +1,190 @@
+"""REST API server for analyzer."""
+
+import logging
+import os
+from logging.config import fileConfig
+from pathlib import Path
+from typing import Tuple
+
+from flask import Flask, Response, jsonify, request
+from presidio_analyzer.analyzer_engine import AnalyzerEngine
+from presidio_analyzer.analyzer_request import AnalyzerRequest
+from presidio_analyzer.batch_analyzer_engine import BatchAnalyzerEngine
+from presidio_anonymizer import BatchAnonymizerEngine
+from werkzeug.exceptions import HTTPException
+
+from .helpers import convert_all_lists_to_dicts, extract_data_types_from_results
+
+data_items_set = [
+    "CREDIT_CARD",
+    "NRP",
+    "US_ITIN",
+    "PERSON",
+    "US_BANK_NUMBER",
+    "US_PASSPORT",
+    "IP_ADDRESS",
+    "US_DRIVER_LICENSE",
+    "CRYPTO",
+    "URL",
+    "PHONE_NUMBER",
+    "IBAN_CODE",
+    "DATE_TIME",
+    "LOCATION",
+    "EMAIL_ADDRESS",
+    "US_SSN",
+]
+
+LOGGING_CONF_FILE = "logging.ini"
+
+WELCOME_MESSAGE = r"""
+ _______  _______  _______  _______ _________ ______  _________ _______
+(  ____ )(  ____ )(  ____ \(  ____ \\__   __/(  __  \ \__   __/(  ___  )
+| (    )|| (    )|| (    \/| (    \/   ) (   | (  \  )   ) (   | (   ) |
+| (____)|| (____)|| (__    | (_____    | |   | |   ) |   | |   | |   | |
+|  _____)|     __)|  __)   (_____  )   | |   | |   | |   | |   | |   | |
+| (      | (\ (   | (            ) |   | |   | |   ) |   | |   | |   | |
+| )      | ) \ \__| (____/\/\____) |___) (___| (__/  )___) (___| (___) |
+|/       |/   \__/(_______/\_______)\_______/(______/ \_______/(_______)
+"""
+
+
+class Server:
+    """HTTP Server for calling Presidio Analyzer."""
+
+    def __init__(self):
+        fileConfig(Path(Path(__file__).parent, LOGGING_CONF_FILE))
+        self.logger = logging.getLogger("presidio-analyzer")
+        self.logger.setLevel(os.environ.get("LOG_LEVEL", self.logger.level))
+        self.app = Flask(__name__)
+        self.logger.info("Starting analyzer engine")
+        self.engine = AnalyzerEngine()
+        self.batch_analyzer = BatchAnalyzerEngine(analyzer_engine=self.engine)
+        self.batch_anonymizer = BatchAnonymizerEngine()
+        self.logger.info(WELCOME_MESSAGE)
+
+        @self.app.route("/health")
+        def health() -> str:
+            """Return basic health probe result."""
+            return "Presidio Analyzer service is up"
+
+        @self.app.route("/analyze", methods=["POST"])
+        def analyze() -> Tuple[Response, int]:
+            """Execute the analyzer function."""
+            # Parse the request params
+            try:
+                req_data = AnalyzerRequest(request.get_json())
+                if not req_data.text:
+                    raise Exception("No text provided")
+
+                if not req_data.language:
+                    raise Exception("No language provided")
+
+                recognizer_result_list = self.engine.analyze(
+                    text=req_data.text,
+                    language=req_data.language,
+                    correlation_id=req_data.correlation_id,
+                    score_threshold=req_data.score_threshold,
+                    entities=req_data.entities,
+                    return_decision_process=req_data.return_decision_process,
+                    ad_hoc_recognizers=req_data.ad_hoc_recognizers,
+                    context=req_data.context,
+                )
+
+                return jsonify(recognizer_result_list), 200
+            except TypeError as te:
+                error_msg = (
+                    f"Failed to parse /analyze request "
+                    f"for AnalyzerEngine.analyze(). {te.args[0]}"
+                )
+                self.logger.error(error_msg)
+                return jsonify(error=error_msg), 400
+
+            except Exception as e:
+                self.logger.error(
+                    f"A fatal error occurred during execution of "
+                    f"AnalyzerEngine.analyze(). {e}"
+                )
+                return jsonify(error=e.args[0]), 500
+
+        @self.app.route("/recognizers", methods=["GET"])
+        def recognizers() -> Tuple[Response, int]:
+            """Return a list of supported recognizers."""
+            language = request.args.get("language")
+            try:
+                recognizers_list = self.engine.get_recognizers(language)
+                names = [o.name for o in recognizers_list]
+                return jsonify(names), 200
+            except Exception as e:
+                self.logger.error(
+                    f"A fatal error occurred during execution of "
+                    f"AnalyzerEngine.get_recognizers(). {e}"
+                )
+                return jsonify(error=e.args[0]), 500
+
+        @self.app.route("/supportedentities", methods=["GET"])
+        def supported_entities() -> Tuple[Response, int]:
+            """Return a list of supported entities."""
+            language = request.args.get("language")
+            try:
+                entities_list = self.engine.get_supported_entities(language)
+                return jsonify(entities_list), 200
+            except Exception as e:
+                self.logger.error(
+                    f"A fatal error occurred during execution of "
+                    f"AnalyzerEngine.supported_entities(). {e}"
+                )
+                return jsonify(error=e.args[0]), 500
+
+        @self.app.errorhandler(HTTPException)
+        def http_exception(e):
+            return jsonify(error=e.description), e.code
+
+        @self.app.route("/batchanalyze", methods=["POST"])
+        def batch_analyze() -> Tuple[Response, int]:
+            """Execute the batch analyzer function."""
+            # Parse the request params
+            try:
+                request_obj = request.get_json()
+                print(request_obj["json_to_analyze"], type(request_obj))
+                if (
+                    "json_to_analyze" not in request_obj
+                    or request_obj["json_to_analyze"] is None
+                ):
+                    raise Exception(
+                        "Please set a JSON field named 'json_to_analyze' in the body, with the JSON object "
+                        "to analyze."
+                    )
+
+                # Note that this function implementation already adds the key as additional 'context'
+                # for the decision (see batch_analyzer_engine.py line 96)
+                recognizer_result_list = self.batch_analyzer.analyze_dict(
+                    input_dict=convert_all_lists_to_dicts(
+                        request_obj["json_to_analyze"]
+                    ),
+                    language="en",
+                )
+                print(recognizer_result_list)
+
+                unique_pii_list = extract_data_types_from_results(
+                    recognizer_result_list
+                )
+
+                unique_valid_pii_list = [
+                    pii for pii in unique_pii_list if pii in data_items_set
+                ]
+
+                return jsonify(unique_valid_pii_list), 200
+            except TypeError as te:
+                error_msg = (
+                    f"Failed to parse /batchanalyze request "
+                    f"for AnalyzerEngine.analyze(). {te.args[0]}"
+                )
+                self.logger.error(error_msg)
+                return jsonify(error=error_msg), 400
+
+            except Exception as e:
+                self.logger.error(
+                    f"A fatal error occurred during execution of "
+                    f"BatchAnalyzer.analyze_dict(). {e}"
+                )
+                return jsonify(error=e.args[0]), 500