From d63760709ede817ba7a5fd6fedae7ad8392438a1 Mon Sep 17 00:00:00 2001 From: Morten Brekkevold Date: Fri, 20 Dec 2024 15:45:30 +0000 Subject: [PATCH 1/3] Test prevention of error mail in Graphite proxy This adds a bit of complicated regression testing of #3259, which causes Dango to send error e-mail to NAV site admins when the proxied response code from Graphite-web is in the 5XX range. --- tests/integration/web/graphite_test.py | 127 +++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 tests/integration/web/graphite_test.py diff --git a/tests/integration/web/graphite_test.py b/tests/integration/web/graphite_test.py new file mode 100644 index 0000000000..1833b0a1b8 --- /dev/null +++ b/tests/integration/web/graphite_test.py @@ -0,0 +1,127 @@ +# +# Copyright (C) 2024 Sikt +# +# This file is part of Network Administration Visualized (NAV). +# +# NAV is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License version 3 as published by +# the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. You should have received a copy of the GNU General Public +# License along with NAV. If not, see . +# +import http.server +import socketserver +import threading +import time + +import nav.web.graphite.views +import pytest +import requests + + +def test_graphite_proxy_should_not_log_503_error_from_graphite_web( + client, mock_graphite_config, fake_graphite_web_server, capsys, monkeypatch +): + from django.conf import settings + + url = "/graphite/render/?width=815&height=364&_salt=1400078618.531&from=-1hours&target=alias%28sumSeries%28group%28carbon.agents.%2A.metricsReceived%29%29%2C%22Metrics+received%22%29&target=alias%28sumSeries%28group%28carbon.agents.%2A.committedPoints%29%29%2C%22Committed+points%22%29&target=alias%28secondYAxis%28sumSeries%28group%28carbon.agents.%2A.cache.size%29%29%29%2C%22Cache+size%22%29" + monkeypatch.setattr( + settings, "EMAIL_BACKEND", "django.core.mail.backends.console.EmailBackend" + ) + monkeypatch.setattr(settings, "DEBUG", False) + client.get(url) + captured = capsys.readouterr() + assert "[Django] ERROR" not in captured.out + assert "Service Unavailable" not in captured.out + + +def test_graphite_proxy_should_respond_with_graphite_web_status_code( + client, mock_graphite_config, fake_graphite_web_server +): + url = "/graphite/render/?width=815&height=364&_salt=1400078618.531&from=-1hours&target=alias%28sumSeries%28group%28carbon.agents.%2A.metricsReceived%29%29%2C%22Metrics+received%22%29&target=alias%28sumSeries%28group%28carbon.agents.%2A.committedPoints%29%29%2C%22Committed+points%22%29&target=alias%28secondYAxis%28sumSeries%28group%28carbon.agents.%2A.cache.size%29%29%29%2C%22Cache+size%22%29" + response = client.get(url) + assert response.status_code == 503 + + +def test_fake_graphite_server_should_respond_with_503_error(fake_graphite_web_server): + response = requests.get(f"http://localhost:{fake_graphite_web_server}") + assert response.status_code == 503 + + +# +# Helpers and fixtures +# + + +@pytest.fixture(scope="module") +def fake_graphite_web_server(): + """A fixture that starts a fake graphite web server that always responds with a + 503 status code. The fixture returns the localhost port number the server + listens to. + """ + port = 54321 + response_code = 503 # Example response code + + handler = lambda *args, **kwargs: SingleStatusHandler( + *args, response_code=response_code, **kwargs + ) + httpd = socketserver.TCPServer(("", port), handler) + + thread = threading.Thread(target=httpd.serve_forever) + thread.daemon = True + thread.start() + time.sleep(1) # Give the server a second to ensure it starts + + yield port + + httpd.shutdown() + thread.join() + + +@pytest.fixture +def mock_graphite_config(fake_graphite_web_server, monkeypatch): + def mock_getter(*args, **kwargs): + return f"http://localhost:{fake_graphite_web_server}/" + + monkeypatch.setattr(nav.web.graphite.views.CONFIG, "get", mock_getter) + yield monkeypatch + + +class SingleStatusHandler(http.server.SimpleHTTPRequestHandler): + """A request handler that responds to all requests with the same response code + and dummy content (for testing response code handling) + """ + + def __init__(self, *args, **kwargs): + self.response_code = kwargs.pop("response_code", 200) + super().__init__(*args, **kwargs) + + def do_GET(self): + self.send_response(self.response_code) + self.send_header("Content-type", "text/html") + self.end_headers() + self.wfile.write(b"Test server response") + + def do_POST(self): + self.send_response(self.response_code) + self.send_header("Content-type", "text/html") + self.end_headers() + self.wfile.write(b"Test server response") + + +def main(): + """Main function for manual test usage of the fake server""" + port = 54321 + handler = lambda *args, **kwargs: SingleStatusHandler( + *args, response_code=503, **kwargs + ) + httpd = socketserver.TCPServer(("", port), handler) + httpd.serve_forever() + + +if __name__ == "__main__": + main() From 7e663d0b0b56ac9d95be01bc3974ff830f903bba Mon Sep 17 00:00:00 2001 From: Morten Brekkevold Date: Fri, 20 Dec 2024 14:38:34 +0000 Subject: [PATCH 2/3] Stop logging proxied 5XX Graphite errors This adds what amounts to a dirty hack to the graphite-web proxy view (i.e. it reaches into Django internals) to prevent Django from doing a full error response log when proxying 5xx class errors from graphite-web. It does so by tricking `django.utils.log.log_response()` into thinking that the response has already been logged (since our view function has no other control over how its return values are logged by Django). The reason for the 5xx response codes are not bugs in our view code, and does not need to trigger site admin e-mails. --- python/nav/web/graphite/views.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/nav/web/graphite/views.py b/python/nav/web/graphite/views.py index 409c9cc293..76740e1e3f 100644 --- a/python/nav/web/graphite/views.py +++ b/python/nav/web/graphite/views.py @@ -73,6 +73,14 @@ def index(request, uri): else: response = HttpResponse(output, content_type=content_type, status=status) + # Dirty hack to prevent logging of 5XX errors. More specifically, this prevents + # a full Django error mail from being sent when Graphite is down or otherwise + # failing. (I.e. we're just proxying the status code, there is no error in our + # application that needs to be mailed to anyone). + if status >= 500: + # Tricks Django into thinking this response has already been logged: + response._has_been_logged = True + response['X-Where-Am-I'] = request.get_full_path() return response From 20aa2159714baeb8087e8ae22c42189886467ffe Mon Sep 17 00:00:00 2001 From: Morten Brekkevold Date: Fri, 20 Dec 2024 15:47:08 +0000 Subject: [PATCH 3/3] Add news fragment --- changelog.d/3259.fixed.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3259.fixed.md diff --git a/changelog.d/3259.fixed.md b/changelog.d/3259.fixed.md new file mode 100644 index 0000000000..c920904d95 --- /dev/null +++ b/changelog.d/3259.fixed.md @@ -0,0 +1 @@ +Stop sending error mails to site admins from graphite-web proxy endpoint when graphite response code is in the 5XX range