Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add max three app name status for agent control health check #1291

Merged
merged 6 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions newrelic/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,17 @@
from newrelic.common.log_file import initialize_logging
from newrelic.common.object_names import callable_name, expand_builtin_exception_name
from newrelic.core import trace_cache
from newrelic.core.agent_control_health import (
HealthStatus,
agent_control_health_instance,
agent_control_healthcheck_loop,
)
from newrelic.core.config import (
Settings,
apply_config_setting,
default_host,
fetch_config_setting,
)
from newrelic.core.agent_control_health import HealthStatus, agent_control_health_instance, agent_control_healthcheck_loop


__all__ = ["initialize", "filter_app_factory"]

Expand Down Expand Up @@ -597,12 +600,16 @@ def _process_app_name_setting():
# primary application name and link it with the other applications.
# When activating the application the linked names will be sent
# along to the core application where the association will be
# created if the do not exist.
# created if it does not exist.

name = _settings.app_name.split(";")[0].strip() or "Python Application"
app_name_list = _settings.app_name.split(";")
name = app_name_list[0].strip() or "Python Application"

if len(app_name_list) > 3:
agent_control_health.set_health_status(HealthStatus.MAX_APP_NAME.value)

linked = []
for altname in _settings.app_name.split(";")[1:]:
for altname in app_name_list[1:]:
altname = altname.strip()
if altname:
linked.append(altname)
Expand Down Expand Up @@ -4833,7 +4840,9 @@ def _setup_agent_console():
newrelic.core.agent.Agent.run_on_startup(_startup_agent_console)


agent_control_health_thread = threading.Thread(name="Agent-Control-Health-Main-Thread", target=agent_control_healthcheck_loop)
agent_control_health_thread = threading.Thread(
name="Agent-Control-Health-Main-Thread", target=agent_control_healthcheck_loop
)
agent_control_health_thread.daemon = True


Expand Down
11 changes: 9 additions & 2 deletions newrelic/core/agent_control_health.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
from enum import IntEnum
from pathlib import Path
from urllib.parse import urlparse
from newrelic.core.config import _environ_as_bool, _environ_as_int

from newrelic.core.config import _environ_as_bool, _environ_as_int

_logger = logging.getLogger(__name__)

Expand All @@ -33,6 +33,7 @@ class HealthStatus(IntEnum):
MISSING_LICENSE = 2
FORCED_DISCONNECT = 3
HTTP_ERROR = 4
MAX_APP_NAME = 6
PROXY_ERROR = 7
AGENT_DISABLED = 8
FAILED_NR_CONNECTION = 9
Expand All @@ -47,6 +48,7 @@ class HealthStatus(IntEnum):
HealthStatus.MISSING_LICENSE.value: "License key missing in configuration",
HealthStatus.FORCED_DISCONNECT.value: "Forced disconnect received from New Relic (HTTP status code 410)",
HealthStatus.HTTP_ERROR.value: "HTTP error response code {response_code} received from New Relic while sending data type {info}",
HealthStatus.MAX_APP_NAME.value: "The maximum number of configured app names (3) exceeded",
HealthStatus.PROXY_ERROR.value: "HTTP Proxy configuration error; response code {response_code}",
HealthStatus.AGENT_DISABLED.value: "Agent is disabled via configuration",
HealthStatus.FAILED_NR_CONNECTION.value: "Failed to connect to New Relic data collector",
Expand All @@ -61,6 +63,8 @@ class HealthStatus(IntEnum):
)
LICENSE_KEY_ERROR_CODES = frozenset([HealthStatus.INVALID_LICENSE.value, HealthStatus.MISSING_LICENSE.value])

NR_CONNECTION_ERROR_CODES = frozenset([HealthStatus.FAILED_NR_CONNECTION.value, HealthStatus.FORCED_DISCONNECT.value])


def is_valid_file_delivery_location(file_uri):
# Verify whether file directory provided to agent via env var is a valid file URI to determine whether health
Expand Down Expand Up @@ -150,7 +154,10 @@ def set_health_status(self, status_code, response_code=None, info=None):
previous_status_code = self.status_code

if status_code == HealthStatus.FAILED_NR_CONNECTION.value and previous_status_code in LICENSE_KEY_ERROR_CODES:
# Do not update to failed connection status when license key is the issue
# Do not update to failed connection status when license key is the issue so the more descriptive status is not overridden
return
elif status_code in NR_CONNECTION_ERROR_CODES and previous_status_code == HealthStatus.MAX_APP_NAME:
# Do not let NR connection error override the max app name status
return
elif status_code == HealthStatus.AGENT_SHUTDOWN.value and not self.is_healthy:
# Do not override status with agent_shutdown unless the agent was previously healthy
Expand Down
69 changes: 50 additions & 19 deletions tests/agent_features/test_agent_control_health_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import time
import re
import pytest
import threading
import time

from newrelic.core.config import finalize_application_settings
import pytest
from testing_support.fixtures import initialize_agent
from testing_support.http_client_recorder import HttpClientRecorder
from newrelic.core.agent_control_health import HealthStatus, is_valid_file_delivery_location, agent_control_health_instance
from newrelic.config import initialize, _reset_configuration_done

from newrelic.config import _reset_configuration_done, initialize
from newrelic.core.agent_control_health import (
HealthStatus,
agent_control_health_instance,
is_valid_file_delivery_location,
)
from newrelic.core.agent_protocol import AgentProtocol
from newrelic.core.application import Application
from newrelic.core.config import finalize_application_settings, global_settings
from newrelic.network.exceptions import DiscardDataForRequest


Expand All @@ -34,6 +40,7 @@ def get_health_file_contents(tmp_path):
contents = f.readlines()
return contents


@pytest.mark.parametrize("file_uri", ["", "file://", "/test/dir", "foo:/test/dir"])
def test_invalid_file_directory_supplied(file_uri):
assert not is_valid_file_delivery_location(file_uri)
Expand Down Expand Up @@ -168,6 +175,31 @@ def test_proxy_error_status(monkeypatch, tmp_path):
assert contents[4] == "last_error: NR-APM-007\n"


def test_multiple_activations_running_threads(monkeypatch, tmp_path):
# Setup expected env vars to run agent control health check
monkeypatch.setenv("NEW_RELIC_AGENT_CONTROL_ENABLED", True)
file_path = tmp_path.as_uri()
monkeypatch.setenv("NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION", file_path)

_reset_configuration_done()
initialize()

application_1 = Application("Test App 1")
application_2 = Application("Test App 2")

application_1.activate_session()
application_2.activate_session()

running_threads = threading.enumerate()

# 6 threads expected: One main agent thread, two active session threads, one main health check thread, and two
# active session health threads
assert len(running_threads) == 6
assert running_threads[1].name == "Agent-Control-Health-Main-Thread"
assert running_threads[2].name == "Agent-Control-Health-Session-Thread"
assert running_threads[4].name == "Agent-Control-Health-Session-Thread"


def test_update_to_healthy(monkeypatch, tmp_path):
# Setup expected env vars to run agent control health check
monkeypatch.setenv("NEW_RELIC_AGENT_CONTROL_ENABLED", True)
Expand Down Expand Up @@ -200,26 +232,25 @@ def test_update_to_healthy(monkeypatch, tmp_path):
assert contents[1] == "status: Healthy\n"


def test_multiple_activations_running_threads(monkeypatch, tmp_path):
def test_max_app_name_status(monkeypatch, tmp_path):
# Setup expected env vars to run agent control health check
monkeypatch.setenv("NEW_RELIC_AGENT_CONTROL_ENABLED", True)
file_path = tmp_path.as_uri()
monkeypatch.setenv("NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION", file_path)

_reset_configuration_done()
initialize()

application_1 = Application("Test App 1")
application_2 = Application("Test App 2")
initialize_agent(app_name="test1;test2;test3;test4")
# Give time for the scheduler to kick in and write to the health file
time.sleep(5)

application_1.activate_session()
application_2.activate_session()
contents = get_health_file_contents(tmp_path)

running_threads = threading.enumerate()
# Assert on contents of health file
assert len(contents) == 5
assert contents[0] == "healthy: False\n"
assert contents[1] == "status: The maximum number of configured app names (3) exceeded\n"
assert contents[4] == "last_error: NR-APM-006\n"

# 6 threads expected: One main agent thread, two active session threads, one main health check thread, and two
# active session health threads
assert len(running_threads) == 6
assert running_threads[1].name == "Agent-Control-Health-Main-Thread"
assert running_threads[2].name == "Agent-Control-Health-Session-Thread"
assert running_threads[4].name == "Agent-Control-Health-Session-Thread"
# Set app name back to original name specific
settings = global_settings()
settings.app_name = "Python Agent Test (agent_features)"