Skip to content

Commit

Permalink
refactor(presidio): extract Server into separate file
Browse files Browse the repository at this point in the history
  • Loading branch information
qlonik committed Apr 28, 2024
1 parent 521fec3 commit 51583a6
Show file tree
Hide file tree
Showing 2 changed files with 191 additions and 187 deletions.
188 changes: 1 addition & 187 deletions presidio/server/__main__.py
Original file line number Diff line number Diff line change
@@ -1,197 +1,11 @@
"""REST API server for analyzer."""

import logging
import os
from logging.config import fileConfig
from pathlib import Path
from typing import Tuple

from flask import Flask, Response, jsonify, request
from presidio_analyzer.analyzer_engine import AnalyzerEngine
from presidio_analyzer.analyzer_request import AnalyzerRequest
from presidio_analyzer.batch_analyzer_engine import BatchAnalyzerEngine
from presidio_anonymizer import BatchAnonymizerEngine
from werkzeug.exceptions import HTTPException

from .helpers import convert_all_lists_to_dicts, extract_data_types_from_results
from .server import Server

DEFAULT_PORT = "3000"

LOGGING_CONF_FILE = "logging.ini"

WELCOME_MESSAGE = r"""
_______ _______ _______ _______ _________ ______ _________ _______
( ____ )( ____ )( ____ \( ____ \\__ __/( __ \ \__ __/( ___ )
| ( )|| ( )|| ( \/| ( \/ ) ( | ( \ ) ) ( | ( ) |
| (____)|| (____)|| (__ | (_____ | | | | ) | | | | | | |
| _____)| __)| __) (_____ ) | | | | | | | | | | | |
| ( | (\ ( | ( ) | | | | | ) | | | | | | |
| ) | ) \ \__| (____/\/\____) |___) (___| (__/ )___) (___| (___) |
|/ |/ \__/(_______/\_______)\_______/(______/ \_______/(_______)
"""


class Server:
"""HTTP Server for calling Presidio Analyzer."""

def __init__(self):
fileConfig(Path(Path(__file__).parent, LOGGING_CONF_FILE))
self.logger = logging.getLogger("presidio-analyzer")
self.logger.setLevel(os.environ.get("LOG_LEVEL", self.logger.level))
self.app = Flask(__name__)
self.logger.info("Starting analyzer engine")
self.engine = AnalyzerEngine()
self.batch_analyzer = BatchAnalyzerEngine(analyzer_engine=self.engine)
self.batch_anonymizer = BatchAnonymizerEngine()
self.logger.info(WELCOME_MESSAGE)

@self.app.route("/health")
def health() -> str:
"""Return basic health probe result."""
return "Presidio Analyzer service is up"

@self.app.route("/analyze", methods=["POST"])
def analyze() -> Tuple[Response, int]:
"""Execute the analyzer function."""
# Parse the request params
try:
req_data = AnalyzerRequest(request.get_json())
if not req_data.text:
raise Exception("No text provided")

if not req_data.language:
raise Exception("No language provided")

recognizer_result_list = self.engine.analyze(
text=req_data.text,
language=req_data.language,
correlation_id=req_data.correlation_id,
score_threshold=req_data.score_threshold,
entities=req_data.entities,
return_decision_process=req_data.return_decision_process,
ad_hoc_recognizers=req_data.ad_hoc_recognizers,
context=req_data.context,
)

return jsonify(recognizer_result_list), 200
except TypeError as te:
error_msg = (
f"Failed to parse /analyze request "
f"for AnalyzerEngine.analyze(). {te.args[0]}"
)
self.logger.error(error_msg)
return jsonify(error=error_msg), 400

except Exception as e:
self.logger.error(
f"A fatal error occurred during execution of "
f"AnalyzerEngine.analyze(). {e}"
)
return jsonify(error=e.args[0]), 500

@self.app.route("/recognizers", methods=["GET"])
def recognizers() -> Tuple[Response, int]:
"""Return a list of supported recognizers."""
language = request.args.get("language")
try:
recognizers_list = self.engine.get_recognizers(language)
names = [o.name for o in recognizers_list]
return jsonify(names), 200
except Exception as e:
self.logger.error(
f"A fatal error occurred during execution of "
f"AnalyzerEngine.get_recognizers(). {e}"
)
return jsonify(error=e.args[0]), 500

@self.app.route("/supportedentities", methods=["GET"])
def supported_entities() -> Tuple[Response, int]:
"""Return a list of supported entities."""
language = request.args.get("language")
try:
entities_list = self.engine.get_supported_entities(language)
return jsonify(entities_list), 200
except Exception as e:
self.logger.error(
f"A fatal error occurred during execution of "
f"AnalyzerEngine.supported_entities(). {e}"
)
return jsonify(error=e.args[0]), 500

@self.app.errorhandler(HTTPException)
def http_exception(e):
return jsonify(error=e.description), e.code

@self.app.route("/batchanalyze", methods=["POST"])
def batch_analyze() -> Tuple[Response, int]:
"""Execute the batch analyzer function."""
# Parse the request params
try:
request_obj = request.get_json()
print(request_obj["json_to_analyze"], type(request_obj))
if (
"json_to_analyze" not in request_obj
or request_obj["json_to_analyze"] is None
):
raise Exception(
"Please set a JSON field named 'json_to_analyze' in the body, with the JSON object "
"to analyze."
)

# Note that this function implementation already adds the key as additional 'context'
# for the decision (see batch_analyzer_engine.py line 96)
recognizer_result_list = self.batch_analyzer.analyze_dict(
input_dict=convert_all_lists_to_dicts(
request_obj["json_to_analyze"]
),
language="en",
)
print(recognizer_result_list)

unique_pii_list = extract_data_types_from_results(
recognizer_result_list
)

unique_valid_pii_list = [
pii for pii in unique_pii_list if pii in data_items_set
]

return jsonify(unique_valid_pii_list), 200
except TypeError as te:
error_msg = (
f"Failed to parse /batchanalyze request "
f"for AnalyzerEngine.analyze(). {te.args[0]}"
)
self.logger.error(error_msg)
return jsonify(error=error_msg), 400

except Exception as e:
self.logger.error(
f"A fatal error occurred during execution of "
f"BatchAnalyzer.analyze_dict(). {e}"
)
return jsonify(error=e.args[0]), 500


data_items_set = [
"CREDIT_CARD",
"NRP",
"US_ITIN",
"PERSON",
"US_BANK_NUMBER",
"US_PASSPORT",
"IP_ADDRESS",
"US_DRIVER_LICENSE",
"CRYPTO",
"URL",
"PHONE_NUMBER",
"IBAN_CODE",
"DATE_TIME",
"LOCATION",
"EMAIL_ADDRESS",
"US_SSN",
]

if __name__ == "__main__":
port = int(os.environ.get("PORT", DEFAULT_PORT))
server = Server()
Expand Down
190 changes: 190 additions & 0 deletions presidio/server/server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
"""REST API server for analyzer."""

import logging
import os
from logging.config import fileConfig
from pathlib import Path
from typing import Tuple

from flask import Flask, Response, jsonify, request
from presidio_analyzer.analyzer_engine import AnalyzerEngine
from presidio_analyzer.analyzer_request import AnalyzerRequest
from presidio_analyzer.batch_analyzer_engine import BatchAnalyzerEngine
from presidio_anonymizer import BatchAnonymizerEngine
from werkzeug.exceptions import HTTPException

from .helpers import convert_all_lists_to_dicts, extract_data_types_from_results

data_items_set = [
"CREDIT_CARD",
"NRP",
"US_ITIN",
"PERSON",
"US_BANK_NUMBER",
"US_PASSPORT",
"IP_ADDRESS",
"US_DRIVER_LICENSE",
"CRYPTO",
"URL",
"PHONE_NUMBER",
"IBAN_CODE",
"DATE_TIME",
"LOCATION",
"EMAIL_ADDRESS",
"US_SSN",
]

LOGGING_CONF_FILE = "logging.ini"

WELCOME_MESSAGE = r"""
_______ _______ _______ _______ _________ ______ _________ _______
( ____ )( ____ )( ____ \( ____ \\__ __/( __ \ \__ __/( ___ )
| ( )|| ( )|| ( \/| ( \/ ) ( | ( \ ) ) ( | ( ) |
| (____)|| (____)|| (__ | (_____ | | | | ) | | | | | | |
| _____)| __)| __) (_____ ) | | | | | | | | | | | |
| ( | (\ ( | ( ) | | | | | ) | | | | | | |
| ) | ) \ \__| (____/\/\____) |___) (___| (__/ )___) (___| (___) |
|/ |/ \__/(_______/\_______)\_______/(______/ \_______/(_______)
"""


class Server:
"""HTTP Server for calling Presidio Analyzer."""

def __init__(self):
fileConfig(Path(Path(__file__).parent, LOGGING_CONF_FILE))
self.logger = logging.getLogger("presidio-analyzer")
self.logger.setLevel(os.environ.get("LOG_LEVEL", self.logger.level))
self.app = Flask(__name__)
self.logger.info("Starting analyzer engine")
self.engine = AnalyzerEngine()
self.batch_analyzer = BatchAnalyzerEngine(analyzer_engine=self.engine)
self.batch_anonymizer = BatchAnonymizerEngine()
self.logger.info(WELCOME_MESSAGE)

@self.app.route("/health")
def health() -> str:
"""Return basic health probe result."""
return "Presidio Analyzer service is up"

@self.app.route("/analyze", methods=["POST"])
def analyze() -> Tuple[Response, int]:
"""Execute the analyzer function."""
# Parse the request params
try:
req_data = AnalyzerRequest(request.get_json())
if not req_data.text:
raise Exception("No text provided")

if not req_data.language:
raise Exception("No language provided")

recognizer_result_list = self.engine.analyze(
text=req_data.text,
language=req_data.language,
correlation_id=req_data.correlation_id,
score_threshold=req_data.score_threshold,
entities=req_data.entities,
return_decision_process=req_data.return_decision_process,
ad_hoc_recognizers=req_data.ad_hoc_recognizers,
context=req_data.context,
)

return jsonify(recognizer_result_list), 200
except TypeError as te:
error_msg = (
f"Failed to parse /analyze request "
f"for AnalyzerEngine.analyze(). {te.args[0]}"
)
self.logger.error(error_msg)
return jsonify(error=error_msg), 400

except Exception as e:
self.logger.error(
f"A fatal error occurred during execution of "
f"AnalyzerEngine.analyze(). {e}"
)
return jsonify(error=e.args[0]), 500

@self.app.route("/recognizers", methods=["GET"])
def recognizers() -> Tuple[Response, int]:
"""Return a list of supported recognizers."""
language = request.args.get("language")
try:
recognizers_list = self.engine.get_recognizers(language)
names = [o.name for o in recognizers_list]
return jsonify(names), 200
except Exception as e:
self.logger.error(
f"A fatal error occurred during execution of "
f"AnalyzerEngine.get_recognizers(). {e}"
)
return jsonify(error=e.args[0]), 500

@self.app.route("/supportedentities", methods=["GET"])
def supported_entities() -> Tuple[Response, int]:
"""Return a list of supported entities."""
language = request.args.get("language")
try:
entities_list = self.engine.get_supported_entities(language)
return jsonify(entities_list), 200
except Exception as e:
self.logger.error(
f"A fatal error occurred during execution of "
f"AnalyzerEngine.supported_entities(). {e}"
)
return jsonify(error=e.args[0]), 500

@self.app.errorhandler(HTTPException)
def http_exception(e):
return jsonify(error=e.description), e.code

@self.app.route("/batchanalyze", methods=["POST"])
def batch_analyze() -> Tuple[Response, int]:
"""Execute the batch analyzer function."""
# Parse the request params
try:
request_obj = request.get_json()
print(request_obj["json_to_analyze"], type(request_obj))
if (
"json_to_analyze" not in request_obj
or request_obj["json_to_analyze"] is None
):
raise Exception(
"Please set a JSON field named 'json_to_analyze' in the body, with the JSON object "
"to analyze."
)

# Note that this function implementation already adds the key as additional 'context'
# for the decision (see batch_analyzer_engine.py line 96)
recognizer_result_list = self.batch_analyzer.analyze_dict(
input_dict=convert_all_lists_to_dicts(
request_obj["json_to_analyze"]
),
language="en",
)
print(recognizer_result_list)

unique_pii_list = extract_data_types_from_results(
recognizer_result_list
)

unique_valid_pii_list = [
pii for pii in unique_pii_list if pii in data_items_set
]

return jsonify(unique_valid_pii_list), 200
except TypeError as te:
error_msg = (
f"Failed to parse /batchanalyze request "
f"for AnalyzerEngine.analyze(). {te.args[0]}"
)
self.logger.error(error_msg)
return jsonify(error=error_msg), 400

except Exception as e:
self.logger.error(
f"A fatal error occurred during execution of "
f"BatchAnalyzer.analyze_dict(). {e}"
)
return jsonify(error=e.args[0]), 500

0 comments on commit 51583a6

Please sign in to comment.